Import Modules

In [ ]:
# Helper libraries
import datetime
from packaging import version
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

from collections import Counter
import numpy as np
import pandas as pd

# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.datasets import mnist
#from plot_keras_history import plot_history
 
import math
In [ ]:
%matplotlib inline

np.set_printoptions(precision=3, suppress=True) 

Load MNIST Dataset

In [ ]:
(x_train, y_train), (x_test, y_test)= tf.keras.datasets.mnist.load_data()

EDA Training and Test Sets

In [ ]:
print('x_train:\t{}'.format(x_train.shape))
print('y_train:\t{}'.format(y_train.shape))
print('x_test:\t\t{}'.format(x_test.shape))
print('y_test:\t\t{}'.format(y_test.shape))
x_train:	(60000, 28, 28)
y_train:	(60000,)
x_test:		(10000, 28, 28)
y_test:		(10000,)

Review labels for training set

In [ ]:
print("First ten labels training dataset:\n {}\n".format(y_train[0:10]))
First ten labels training dataset:
 [5 0 4 1 9 2 1 3 1 4]

Find frequency of each label in training and test sets

In [ ]:
Counter(y_train).most_common()
Out[ ]:
[(1, 6742),
 (7, 6265),
 (3, 6131),
 (2, 5958),
 (9, 5949),
 (0, 5923),
 (6, 5918),
 (8, 5851),
 (4, 5842),
 (5, 5421)]
In [ ]:
Counter(y_test).most_common()
Out[ ]:
[(1, 1135),
 (2, 1032),
 (7, 1028),
 (3, 1010),
 (9, 1009),
 (4, 982),
 (0, 980),
 (8, 974),
 (6, 958),
 (5, 892)]

Plot sample images with their labels

In [ ]:
fig = plt.figure(figsize = (15, 9))

for i in range(50):
    plt.subplot(5, 10, 1+i)
    plt.title(y_train[i])
    plt.xticks([])
    plt.yticks([])
    plt.imshow(x_train[i].reshape(28,28), cmap='binary')

Apply one-hot encoding on the labels

In [ ]:
y_train_encoded = to_categorical(y_train)
y_test_encoded = to_categorical(y_test)

print("First ten entries of y_train:\n {}\n".format(y_train[0:10]))
print("First ten rows of one-hot y_train:\n {}".format(y_train_encoded[0:10,]))
First ten entries of y_train:
 [5 0 4 1 9 2 1 3 1 4]

First ten rows of one-hot y_train:
 [[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]
In [ ]:
print('y_train_encoded shape: ', y_train_encoded.shape)
print('y_test_encoded shape: ', y_test_encoded.shape)
y_train_encoded shape:  (60000, 10)
y_test_encoded shape:  (10000, 10)

Reshape the images to 1D arrays

In [ ]:
# Before reshape:
print('x_train:\t{}'.format(x_train.shape))
print('x_test:\t\t{}'.format(x_test.shape))
x_train:	(60000, 28, 28)
x_test:		(10000, 28, 28)
In [ ]:
# Reshape the images:
x_train_reshaped = np.reshape(x_train, (60000, 784))
x_test_reshaped = np.reshape(x_test, (10000, 784))

# After reshape:
print('x_train_reshaped shape: ', x_train_reshaped.shape)
print('x_test_reshaped shape: ', x_test_reshaped.shape)
x_train_reshaped shape:  (60000, 784)
x_test_reshaped shape:  (10000, 784)
In [ ]:
# Take a look at the first reshaped training image:
print(set(x_train_reshaped[0]))
{0, 1, 2, 3, 9, 11, 14, 16, 18, 23, 24, 25, 26, 27, 30, 35, 36, 39, 43, 45, 46, 49, 55, 56, 64, 66, 70, 78, 80, 81, 82, 90, 93, 94, 107, 108, 114, 119, 126, 127, 130, 132, 133, 135, 136, 139, 148, 150, 154, 156, 160, 166, 170, 171, 172, 175, 182, 183, 186, 187, 190, 195, 198, 201, 205, 207, 212, 213, 219, 221, 225, 226, 229, 238, 240, 241, 242, 244, 247, 249, 250, 251, 252, 253, 255}
In [ ]:
np.set_printoptions(linewidth=np.inf)
print("{}".format(x_train[2020]))
[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0 167 208  19   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0  13 235 254  99   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0  74 254 234   4   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0 154 254 145   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0 224 254  92   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0  51 245 211  13   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   2 169 254 101   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0  27 254 254  88   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0  72 255 241  15   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0  88 254 153   0   0  33  53 155 156 102  15   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 130 254  31   0 128 235 254 254 254 254 186  10   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 190 254  51 178 254 246 213 111 109 186 254 145   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 192 254 229 254 216  90   0   0   0  57 254 234   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 235 254 254 247  85   0   0   0   0  32 254 234   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 235 254 254 118   0   0   0   0   0 107 254 201   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 235 255 254 102  12   0   0   0   8 188 248 119   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 207 254 254 238 107   0   0  39 175 254 148   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0  84 254 248  74  11  32 115 238 254 176  11   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0  21 214 254 254 254 254 254 254 132   6   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0  14  96 176 254 254 214  48  12   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]]

Rescale the elements of the reshaped images

In [ ]:
x_train_norm = x_train_reshaped.astype('float32') / 255
x_test_norm = x_test_reshaped.astype('float32') / 255
In [ ]:
# Take a look at the first reshaped and normalized training image:
print(set(x_train_norm[0]))
{0.0, 0.011764706, 0.53333336, 0.07058824, 0.49411765, 0.6862745, 0.101960786, 0.6509804, 1.0, 0.96862745, 0.49803922, 0.11764706, 0.14117648, 0.36862746, 0.6039216, 0.6666667, 0.043137256, 0.05490196, 0.03529412, 0.85882354, 0.7764706, 0.7137255, 0.94509804, 0.3137255, 0.6117647, 0.41960785, 0.25882354, 0.32156864, 0.21960784, 0.8039216, 0.8666667, 0.8980392, 0.7882353, 0.52156866, 0.18039216, 0.30588236, 0.44705883, 0.3529412, 0.15294118, 0.6745098, 0.88235295, 0.99215686, 0.9490196, 0.7647059, 0.2509804, 0.19215687, 0.93333334, 0.9843137, 0.74509805, 0.7294118, 0.5882353, 0.50980395, 0.8862745, 0.105882354, 0.09019608, 0.16862746, 0.13725491, 0.21568628, 0.46666667, 0.3647059, 0.27450982, 0.8352941, 0.7176471, 0.5803922, 0.8117647, 0.9764706, 0.98039216, 0.73333335, 0.42352942, 0.003921569, 0.54509807, 0.67058825, 0.5294118, 0.007843138, 0.31764707, 0.0627451, 0.09411765, 0.627451, 0.9411765, 0.9882353, 0.95686275, 0.83137256, 0.5176471, 0.09803922, 0.1764706}

EXPERIMENT 1: Our dense neural network will consist of 784 input nodes, a hidden layer with 1 node and 10 output nodes (corresponding to the 10 digits). We use mnist.load_data() to get the 70,000 images divided into a set of 60,000 training images and 10,000 test images. We hold back 5,000 of the 60,000 training images for validation. After training the model, we group the 60,000 activation values of the hidden node for the (original) set of training images by the 10 predicted classes and visualize these sets of values using a boxplot. We expect the overlap between the range of values in the "boxes" to be minimal. In addition, we find the pattern that maximally activates the hidden node as a "warm up" exercise for similar analysis we will perform on CNN models in Assignment 2.

In [ ]:
model1 = Sequential([
    Dense(input_shape=[784], units = 1, activation = tf.nn.relu),
    Dense(name = "output_layer", units = 10, activation = tf.nn.softmax)
])
In [ ]:
model1.summary()
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_6 (Dense)              (None, 1)                 785       
_________________________________________________________________
output_layer (Dense)         (None, 10)                20        
=================================================================
Total params: 805
Trainable params: 805
Non-trainable params: 0
_________________________________________________________________
In [ ]:
keras.utils.plot_model(model1, "mnist_model.png", show_shapes=True) 
Out[ ]:
In [ ]:
model1.compile(optimizer='rmsprop',           
               loss = 'categorical_crossentropy',
               metrics=['accuracy'])
In [ ]:
history = model1.fit(
    x_train_norm
    ,y_train_encoded
    ,epochs = 200
    ,validation_split=0.20 
    ,callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)] 
    )
Epoch 1/200
1500/1500 [==============================] - 3s 2ms/step - loss: 2.0809 - accuracy: 0.1978 - val_loss: 1.8790 - val_accuracy: 0.2424
Epoch 2/200
1500/1500 [==============================] - 2s 2ms/step - loss: 1.8460 - accuracy: 0.2632 - val_loss: 1.7946 - val_accuracy: 0.2915
Epoch 3/200
1500/1500 [==============================] - 2s 2ms/step - loss: 1.7789 - accuracy: 0.3006 - val_loss: 1.7557 - val_accuracy: 0.3228
Epoch 4/200
1500/1500 [==============================] - 2s 2ms/step - loss: 1.7531 - accuracy: 0.3216 - val_loss: 1.7369 - val_accuracy: 0.3322
Epoch 5/200
1500/1500 [==============================] - 2s 2ms/step - loss: 1.7364 - accuracy: 0.3317 - val_loss: 1.7249 - val_accuracy: 0.3407
Epoch 6/200
1500/1500 [==============================] - 2s 2ms/step - loss: 1.7252 - accuracy: 0.3340 - val_loss: 1.7154 - val_accuracy: 0.3339
Epoch 7/200
1500/1500 [==============================] - 2s 2ms/step - loss: 1.7149 - accuracy: 0.3376 - val_loss: 1.7134 - val_accuracy: 0.3410
Epoch 8/200
1500/1500 [==============================] - 2s 2ms/step - loss: 1.7029 - accuracy: 0.3459 - val_loss: 1.7068 - val_accuracy: 0.3478
Epoch 9/200
1500/1500 [==============================] - 2s 2ms/step - loss: 1.7045 - accuracy: 0.3429 - val_loss: 1.7037 - val_accuracy: 0.3498
Epoch 10/200
1500/1500 [==============================] - 2s 2ms/step - loss: 1.7063 - accuracy: 0.3395 - val_loss: 1.7018 - val_accuracy: 0.3514
Epoch 11/200
1500/1500 [==============================] - 2s 2ms/step - loss: 1.6970 - accuracy: 0.3443 - val_loss: 1.7045 - val_accuracy: 0.3343
Epoch 12/200
1500/1500 [==============================] - 2s 2ms/step - loss: 1.6958 - accuracy: 0.3434 - val_loss: 1.6918 - val_accuracy: 0.3483
In [ ]:
loss1, accuracy1 = model1.evaluate(x_test_norm, y_test_encoded)
print('test set accuracy: ', accuracy1 * 100)
313/313 [==============================] - 1s 2ms/step - loss: 1.6992 - accuracy: 0.3499
test set accuracy:  34.99000072479248
In [ ]:
preds1 = model1.predict(x_test_norm)
print('shape of preds: ', preds1.shape)
shape of preds:  (10000, 10)
In [ ]:
plt.figure(figsize = (12, 12))

start_index = 0

for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    pred1 = np.argmax(preds1[start_index + i])
    actual1 = np.argmax(y_test_encoded[start_index + i])
    col = 'g'
    if pred1 != actual1:
        col = 'r'
    plt.xlabel('i={} | pred1={} | true={}'.format(start_index + i, pred1, actual1), color = col)
    plt.imshow(x_test[start_index + i], cmap='binary')
plt.show()
In [ ]:
index = 17

plt.plot(preds1[index])
plt.show()
In [ ]:
history_dict1 = history.history
history_dict1.keys()
Out[ ]:
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
In [ ]:
losses1 = history.history['loss']
accs1 = history.history['accuracy']
val_losses1 = history.history['val_loss']
val_accs1 = history.history['val_accuracy']
epochs = len(losses1)
In [ ]:
print(val_losses1)
print(val_accs1)
[1.878950834274292, 1.7945926189422607, 1.755731463432312, 1.7369321584701538, 1.724890947341919, 1.7153970003128052, 1.7133634090423584, 1.7068077325820923, 1.7036863565444946, 1.70180082321167, 1.704496145248413, 1.6917636394500732]
[0.24241666495800018, 0.2915000021457672, 0.3227500021457672, 0.3322499990463257, 0.34066668152809143, 0.33391666412353516, 0.3409999907016754, 0.3478333353996277, 0.34983333945274353, 0.351416677236557, 0.33425000309944153, 0.34825000166893005]
In [ ]:
plt.figure(figsize=(16, 4))
for i, metrics in enumerate(zip([losses1, accs1], [val_losses1, val_accs1], ['Loss', 'Accuracy'])):
    plt.subplot(1, 2, i + 1)
    plt.plot(range(epochs), metrics[0], label='Training {}'.format(metrics[2]))
    plt.plot(range(epochs), metrics[1], label='Validation {}'.format(metrics[2]))
    plt.legend()
plt.show()
In [ ]:
# Get the predicted classes:
pred_classes1 = np.argmax(model1.predict(x_train_norm), axis=-1)
pred_classes1
Out[ ]:
array([1, 1, 9, ..., 1, 6, 1])
In [ ]:
conf_mx1 = tf.math.confusion_matrix(y_train, pred_classes1)
conf_mx1
Out[ ]:
<tf.Tensor: shape=(10, 10), dtype=int32, numpy=
array([[   0, 5050,  185,   95,   38,    0,   12,  208,  253,   82],
       [   0, 6259,   59,  145,   11,    0,    2,   48,  201,   17],
       [   0, 1369,  872,  147,  429,    0,  202, 1680,  657,  602],
       [   0, 5166,  223,  149,   22,    0,    4,  168,  352,   47],
       [   0,   10,   40,    5, 3506,    0,  845,  308,   22, 1106],
       [   0, 4109,  313,  160,   39,    0,   24,  278,  423,   75],
       [   0,  105,   67,   11, 1030,    0, 4162,  194,   42,  307],
       [   0,  140,  669,   37,  338,    0,    5, 3431,  248, 1397],
       [   0, 3993,  452,  225,   34,    0,    8,  395,  644,  100],
       [   0,  138,  158,   16, 2624,    0,  132,  942,   73, 1866]], dtype=int32)>
In [ ]:
conf_mx1 = tf.math.confusion_matrix(y_train, pred_classes1)
conf_mx1
Out[ ]:
<tf.Tensor: shape=(10, 10), dtype=int32, numpy=
array([[   0, 5050,  185,   95,   38,    0,   12,  208,  253,   82],
       [   0, 6259,   59,  145,   11,    0,    2,   48,  201,   17],
       [   0, 1369,  872,  147,  429,    0,  202, 1680,  657,  602],
       [   0, 5166,  223,  149,   22,    0,    4,  168,  352,   47],
       [   0,   10,   40,    5, 3506,    0,  845,  308,   22, 1106],
       [   0, 4109,  313,  160,   39,    0,   24,  278,  423,   75],
       [   0,  105,   67,   11, 1030,    0, 4162,  194,   42,  307],
       [   0,  140,  669,   37,  338,    0,    5, 3431,  248, 1397],
       [   0, 3993,  452,  225,   34,    0,    8,  395,  644,  100],
       [   0,  138,  158,   16, 2624,    0,  132,  942,   73, 1866]], dtype=int32)>
In [ ]:
print("First ten entries of the predictions:\n {}\n".format(pred_classes1[0:10]))
First ten entries of the predictions:
 [1 1 9 1 4 2 1 1 1 4]

In [ ]:
cm = sns.light_palette((260, 75, 60), input="husl", as_cmap=True)
In [ ]:
df1 = pd.DataFrame(preds1[0:20], columns = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
df1.style.format("{:.2%}").background_gradient(cmap=cm)
Out[ ]:
0 1 2 3 4 5 6 7 8 9
0 5.48% 0.66% 23.76% 4.25% 8.46% 6.59% 1.60% 24.69% 9.08% 15.44%
1 17.36% 24.24% 4.62% 18.99% 0.27% 14.87% 0.02% 3.43% 15.29% 0.89%
2 17.36% 24.24% 4.62% 18.99% 0.27% 14.87% 0.02% 3.43% 15.29% 0.89%
3 17.36% 24.24% 4.62% 18.99% 0.27% 14.87% 0.02% 3.43% 15.29% 0.89%
4 0.17% 0.00% 12.59% 0.09% 27.75% 0.29% 12.24% 18.37% 0.54% 27.95%
5 17.36% 24.24% 4.62% 18.99% 0.27% 14.87% 0.02% 3.43% 15.29% 0.89%
6 2.33% 0.14% 23.01% 1.63% 13.96% 3.10% 3.38% 26.40% 4.65% 21.40%
7 1.79% 0.09% 22.17% 1.22% 15.58% 2.44% 4.04% 26.14% 3.76% 22.78%
8 0.12% 0.00% 11.21% 0.06% 29.27% 0.21% 13.97% 16.87% 0.39% 27.90%
9 0.01% 0.00% 4.54% 0.00% 35.37% 0.02% 28.04% 8.37% 0.04% 23.60%
10 17.36% 24.24% 4.62% 18.99% 0.27% 14.87% 0.02% 3.43% 15.29% 0.89%
11 6.99% 1.08% 22.97% 5.61% 6.84% 8.13% 1.19% 23.08% 10.88% 13.22%
12 0.09% 0.00% 10.15% 0.04% 30.41% 0.16% 15.47% 15.67% 0.30% 27.71%
13 17.36% 24.24% 4.62% 18.99% 0.27% 14.87% 0.02% 3.43% 15.29% 0.89%
14 17.36% 24.24% 4.62% 18.99% 0.27% 14.87% 0.02% 3.43% 15.29% 0.89%
15 17.36% 24.24% 4.62% 18.99% 0.27% 14.87% 0.02% 3.43% 15.29% 0.89%
16 0.04% 0.00% 7.72% 0.02% 32.88% 0.07% 19.69% 12.72% 0.15% 26.71%
17 12.14% 3.80% 17.79% 10.75% 3.14% 12.79% 0.43% 16.22% 15.73% 7.21%
18 7.19% 1.14% 22.84% 5.79% 6.65% 8.32% 1.15% 22.86% 11.10% 12.96%
19 0.21% 0.00% 13.42% 0.12% 26.82% 0.36% 11.31% 19.22% 0.65% 27.89%
In [ ]:
def plot_confusion_matrix(matrix):
    """If you prefer color and a colorbar"""
    fig = plt.figure(figsize=(8,8))
    ax = fig.add_subplot(111)
    cax = ax.matshow(matrix)
    fig.colorbar(cax)
In [ ]:
plt.figure(figsize=(16,8))
plt.matshow(conf_mx1, cmap=plt.cm.Blues,  fignum=1)
plt.xlabel("Predicted Classes")
plt.ylabel("Actual Classes")
plt.show()
In [ ]:
def plot_digits(instances, pos, images_per_row=5, **options):
    size = 28
    images_per_row = min(len(instances), images_per_row)
    images = [instance.reshape(size,size) for instance in instances]
    n_rows = (len(instances) - 1) // images_per_row + 1
    row_images = []
    n_empty = n_rows * images_per_row - len(instances)
    images.append(np.zeros((size, size * n_empty)))
    for row in range(n_rows):
        rimages = images[row * images_per_row : (row + 1) * images_per_row]
        row_images.append(np.concatenate(rimages, axis=1))
    image = np.concatenate(row_images, axis=0)
    pos.imshow(image, cmap = 'binary', **options)
    pos.axis("off")
In [ ]:
bplot1 = sns.boxplot(data=df1, width=0.5,palette="colorblind")

EXPERIMENT 2: This time our dense neural network will have 784 input nodes, a hidden layer with 2 nodes and 10 output nodes (corresponding to the 10 digits). For each of the 60,000 images, the output of the two hidden nodes are plotted using a scatterplot. We color code the points according to which of the 10 classes the the output of the two nodes predicts. Ideally, just like in EXPERIMENT 1, the color clusters should have very little overlap. Also compare the accuracy % & confusion matrix of Experiments 1 & 2. Again, the goal is to get more insights.

Compile the DNN Model

In [ ]:
model2 = Sequential([
    Dense(input_shape=[784], units = 2, activation = tf.nn.relu),
    Dense(name = "output_layer", units = 10, activation = tf.nn.softmax)
])
In [ ]:
model2.summary()
Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_7 (Dense)              (None, 2)                 1570      
_________________________________________________________________
output_layer (Dense)         (None, 10)                30        
=================================================================
Total params: 1,600
Trainable params: 1,600
Non-trainable params: 0
_________________________________________________________________
In [ ]:
keras.utils.plot_model(model2, "mnist_model.png", show_shapes=True) 
Out[ ]:
In [ ]:
model2.compile(optimizer='rmsprop',           
               loss = 'categorical_crossentropy',
               metrics=['accuracy'])

Train the DNN Model

In [ ]:
history = model2.fit(
    x_train_norm
    ,y_train_encoded
    ,epochs = 200
    ,validation_split=0.20 
    ,callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)] 
    )
Epoch 1/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.8795 - accuracy: 0.3363 - val_loss: 1.3818 - val_accuracy: 0.5470
Epoch 2/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.3478 - accuracy: 0.5614 - val_loss: 1.2225 - val_accuracy: 0.5970
Epoch 3/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.2321 - accuracy: 0.5889 - val_loss: 1.1562 - val_accuracy: 0.6164
Epoch 4/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.1686 - accuracy: 0.6092 - val_loss: 1.1218 - val_accuracy: 0.6287
Epoch 5/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.1500 - accuracy: 0.6181 - val_loss: 1.1032 - val_accuracy: 0.6369
Epoch 6/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.1312 - accuracy: 0.6275 - val_loss: 1.0935 - val_accuracy: 0.6395
Epoch 7/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.1186 - accuracy: 0.6344 - val_loss: 1.0902 - val_accuracy: 0.6417
Epoch 8/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.1161 - accuracy: 0.6348 - val_loss: 1.0842 - val_accuracy: 0.6472
Epoch 9/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.1061 - accuracy: 0.6415 - val_loss: 1.0795 - val_accuracy: 0.6439
Epoch 10/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.1056 - accuracy: 0.6426 - val_loss: 1.0709 - val_accuracy: 0.6504
Epoch 11/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.1067 - accuracy: 0.6429 - val_loss: 1.0641 - val_accuracy: 0.6514
Epoch 12/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.1095 - accuracy: 0.6409 - val_loss: 1.0616 - val_accuracy: 0.6553
Epoch 13/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.0845 - accuracy: 0.6487 - val_loss: 1.0591 - val_accuracy: 0.6554
Epoch 14/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.0849 - accuracy: 0.6442 - val_loss: 1.0561 - val_accuracy: 0.6558
Epoch 15/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.0873 - accuracy: 0.6488 - val_loss: 1.0531 - val_accuracy: 0.6555
Epoch 16/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.0769 - accuracy: 0.6500 - val_loss: 1.0507 - val_accuracy: 0.6570
Epoch 17/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.0790 - accuracy: 0.6458 - val_loss: 1.0494 - val_accuracy: 0.6582
Epoch 18/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.0701 - accuracy: 0.6549 - val_loss: 1.0443 - val_accuracy: 0.6590
Epoch 19/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.0639 - accuracy: 0.6554 - val_loss: 1.0442 - val_accuracy: 0.6598
Epoch 20/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.0656 - accuracy: 0.6529 - val_loss: 1.0538 - val_accuracy: 0.6574
Epoch 21/200
1500/1500 [==============================] - 3s 2ms/step - loss: 1.0725 - accuracy: 0.6513 - val_loss: 1.0522 - val_accuracy: 0.6572

Evaluate the DNN Model

In [ ]:
loss2, accuracy2 = model2.evaluate(x_test_norm, y_test_encoded)
print('test set accuracy: ', accuracy2 * 100)
313/313 [==============================] - 1s 2ms/step - loss: 1.0683 - accuracy: 0.6490
test set accuracy:  64.89999890327454

Making Predictions

In [ ]:
preds2 = model2.predict(x_test_norm)
print('shape of preds: ', preds2.shape)
shape of preds:  (10000, 10)
In [ ]:
plt.figure(figsize = (12, 12))

start_index = 0

for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    pred2 = np.argmax(preds2[start_index + i])
    actual2 = np.argmax(y_test_encoded[start_index + i])
    col = 'g'
    if pred2 != actual2:
        col = 'r'
    plt.xlabel('i={} | pred={} | true={}'.format(start_index + i, pred2, actual2), color = col)
    plt.imshow(x_test[start_index + i], cmap='binary')
plt.show()
In [ ]:
index = 17

plt.plot(preds2[index])
plt.show()

Reviewing Performance

In [ ]:
history_dict2 = history.history
history_dict2.keys()
Out[ ]:
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
In [ ]:
history_dict2 = history.history
history_dict2.keys()
Out[ ]:
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
In [ ]:
losses2 = history.history['loss']
accs2 = history.history['accuracy']
val_losses2 = history.history['val_loss']
val_accs2 = history.history['val_accuracy']
epochs = len(losses2)
In [ ]:
plt.figure(figsize=(16, 4))
for i, metrics in enumerate(zip([losses2, accs2], [val_losses2, val_accs2], ['Loss', 'Accuracy'])):
    plt.subplot(1, 2, i + 1)
    plt.plot(range(epochs), metrics[0], label='Training {}'.format(metrics[2]))
    plt.plot(range(epochs), metrics[1], label='Validation {}'.format(metrics[2]))
    plt.legend()
plt.show()

Create the confusion matrix

In [ ]:
# Get the predicted classes:
pred_classes2 = np.argmax(model2.predict(x_train_norm), axis=-1)
pred_classes2
Out[ ]:
array([2, 0, 4, ..., 8, 6, 8])
In [ ]:
conf_mx = tf.math.confusion_matrix(y_train, pred_classes2)
conf_mx
Out[ ]:
<tf.Tensor: shape=(10, 10), dtype=int32, numpy=
array([[4581,    0,  480,    6,    5,    1,   59,  701,   80,   10],
       [   9, 6391,   24,   33,   27,    1,    0,   12,  219,   26],
       [ 854,   28, 2987,  716,   91,   40,   82,  384,  620,  156],
       [  52,  205,  506, 4217,   19,   35,    4,   61,  966,   66],
       [   6,  104,    9,    3, 5142,    1,  190,   49,   12,  326],
       [  75,  118,  476,  746,   67,   97,   21,  248, 3043,  530],
       [  58,   15,   10,    0,  346,    0, 4199,  412,   61,  817],
       [ 199,   41,   47,   18,  172,    3,  373, 4218,  422,  772],
       [  76,  279,  333,  499,   78,   69,   19,  321, 3620,  557],
       [  22,   36,   23,   23,  874,    1,  818,  246,  199, 3707]], dtype=int32)>
In [ ]:
print("The first prediction\n {}\n".format(pred_classes2[0]))
The first prediction
 2

In [ ]:
print("First ten entries of the predictions:\n {}\n".format(pred_classes2[0:10]))
First ten entries of the predictions:
 [2 0 4 1 9 2 1 3 1 4]

In [ ]:
cm = sns.light_palette((260, 75, 60), input="husl", as_cmap=True)
In [ ]:
df = pd.DataFrame(preds2[0:20], columns = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
df.style.format("{:.2%}").background_gradient(cmap=cm)
Out[ ]:
0 1 2 3 4 5 6 7 8 9
0 34.70% 0.00% 3.73% 0.01% 0.00% 0.77% 1.64% 55.98% 0.81% 2.36%
1 32.44% 0.00% 65.23% 2.10% 0.00% 0.17% 0.00% 0.00% 0.06% 0.00%
2 0.01% 87.32% 0.19% 1.58% 0.49% 3.76% 0.02% 0.12% 5.52% 0.99%
3 82.88% 0.00% 4.22% 0.00% 0.00% 0.14% 0.02% 12.60% 0.12% 0.03%
4 0.00% 0.01% 0.01% 0.00% 86.84% 0.11% 2.79% 0.39% 0.19% 9.67%
5 0.00% 94.65% 0.08% 1.07% 0.06% 1.62% 0.00% 0.03% 2.31% 0.17%
6 0.00% 0.11% 0.02% 0.01% 86.58% 0.31% 1.72% 0.41% 0.55% 10.29%
7 0.06% 17.44% 0.56% 1.29% 28.95% 9.83% 1.30% 1.99% 15.43% 23.13%
8 0.00% 0.00% 0.00% 0.00% 69.80% 0.01% 17.23% 0.64% 0.02% 12.30%
9 0.14% 0.00% 0.01% 0.00% 0.51% 0.01% 70.55% 9.96% 0.02% 18.80%
10 56.20% 0.00% 34.29% 0.55% 0.00% 3.16% 0.00% 3.33% 2.45% 0.01%
11 9.96% 0.00% 8.46% 0.42% 0.01% 10.67% 2.63% 40.51% 12.94% 14.42%
12 0.46% 0.00% 0.19% 0.00% 2.27% 0.56% 32.37% 17.42% 0.84% 45.89%
13 51.27% 0.00% 18.80% 0.19% 0.00% 3.88% 0.07% 21.71% 3.65% 0.43%
14 0.00% 92.25% 0.14% 1.97% 0.01% 2.31% 0.00% 0.03% 3.16% 0.12%
15 0.46% 0.61% 10.12% 45.52% 0.00% 21.76% 0.00% 0.14% 21.36% 0.02%
16 0.16% 0.00% 0.10% 0.00% 11.89% 0.49% 28.44% 9.33% 0.78% 48.79%
17 55.19% 0.00% 6.06% 0.01% 0.00% 0.76% 0.30% 36.36% 0.73% 0.60%
18 9.63% 0.00% 37.46% 11.62% 0.00% 20.75% 0.00% 1.76% 18.72% 0.05%
19 0.00% 0.28% 0.02% 0.02% 86.12% 0.45% 1.46% 0.42% 0.77% 10.46%

Visualize the confusion matrix

In [ ]:
def plot_confusion_matrix(matrix):
    """If you prefer color and a colorbar"""
    fig = plt.figure(figsize=(8,8))
    ax = fig.add_subplot(111)
    cax = ax.matshow(matrix)
    fig.colorbar(cax)
In [ ]:
plt.figure(figsize=(16,8))
plt.matshow(conf_mx, cmap=plt.cm.Blues,  fignum=1)
plt.xlabel("Predicted Classes")
plt.ylabel("Actual Classes")
plt.show()
In [ ]:
def plot_digits(instances, pos, images_per_row=5, **options):
    size = 28
    images_per_row = min(len(instances), images_per_row)
    images = [instance.reshape(size,size) for instance in instances]
    n_rows = (len(instances) - 1) // images_per_row + 1
    row_images = []
    n_empty = n_rows * images_per_row - len(instances)
    images.append(np.zeros((size, size * n_empty)))
    for row in range(n_rows):
        rimages = images[row * images_per_row : (row + 1) * images_per_row]
        row_images.append(np.concatenate(rimages, axis=1))
    image = np.concatenate(row_images, axis=0)
    pos.imshow(image, cmap = 'binary', **options)
    pos.axis("off")
In [ ]:
cl_a, cl_b = 4, 9
X_aa = x_train_norm[(y_train == cl_a) & (pred_classes2 == cl_a)]
X_ab = x_train_norm[(y_train == cl_a) & (pred_classes2 == cl_b)]
X_ba = x_train_norm[(y_train == cl_b) & (pred_classes2 == cl_a)]
X_bb = x_train_norm[(y_train == cl_b) & (pred_classes2 == cl_b)]

plt.figure(figsize=(6,6))

p1 = plt.subplot(221)
p2 = plt.subplot(222)
p3 = plt.subplot(223)
p4 = plt.subplot(224)

plot_digits(X_aa[:25], p1, images_per_row=5);
plot_digits(X_ab[:25], p2, images_per_row=5);
plot_digits(X_ba[:25], p3, images_per_row=5);  
plot_digits(X_bb[:25], p4, images_per_row=5);


p1.set_title(f"{cl_a}'s classified as {cl_a}'s")
p2.set_title(f"{cl_a}'s classified as {cl_b}'s")
p3.set_title(f"{cl_b}'s classified as {cl_a}'s")
p4.set_title(f"{cl_b}'s classified as {cl_b}'s")

# plt.savefig("error_analysis_digits_plot_EXP1_valid")

plt.show()
In [ ]:
# display 50 of the 4's classified correctly
plt.figure(figsize=(10,10))
plot_digits(X_aa[:50],plt,images_per_row = 10)
In [ ]:
plt.figure(figsize=(10,10))
plot_digits(X_bb[:50],plt, images_per_row = 10)

Get the activation values of the hidden nodes

In [ ]:
# Extracts the outputs of the 2 layers:
layer_outputs2 = [layer.output for layer in model2.layers]

# Creates a model that will return these outputs, given the model input:
activation_model2 = models.Model(inputs=model2.input, outputs=layer_outputs2)

print(f"There are {len(layer_outputs2)} layers")
layer_outputs2 # description of the layers
There are 2 layers
Out[ ]:
[<KerasTensor: shape=(None, 2) dtype=float32 (created by layer 'dense_7')>,
 <KerasTensor: shape=(None, 10) dtype=float32 (created by layer 'output_layer')>]
In [ ]:
# Get the outputs of all the hidden nodes for each of the 60000 training images
activations2 = activation_model2.predict(x_train_norm)
hidden_layer_activation2 = activations2[0]
output_layer_activations2 = activations2[1]
hidden_layer_activation2.shape   #  each of the 128 hidden nodes has one activation value per training image
Out[ ]:
(60000, 2)
In [ ]:
output_layer_activations2.shape
Out[ ]:
(60000, 10)
In [ ]:
print(f"The maximum activation value of the hidden nodes in the hidden layer is \
{hidden_layer_activation2.max()}")
The maximum activation value of the hidden nodes in the hidden layer is 38.590084075927734
In [ ]:
# Some stats about the output layer as an aside...
np.set_printoptions(suppress = True)  # display probabilities as decimals and NOT in scientific notation
ouput_layer_activation2 = activations2[1]
print(f"The output node has shape {ouput_layer_activation2.shape}")
print(f"The output for the first image are {ouput_layer_activation2[0].round(4)}")
print(f"The sum of the probabilities is (approximately) {ouput_layer_activation2[0].sum()}")
The output node has shape (60000, 10)
The output for the first image are [0.06  0.    0.275 0.121 0.    0.261 0.    0.024 0.256 0.002]
The sum of the probabilities is (approximately) 1.0

Create a DF with the activation values and class labels

In [ ]:
#Get the dataframe of all the node values
activation_data2 = {'actual_class':y_train}
for k in range(0,2): 
    activation_data2[f"act_val_{k}"] = hidden_layer_activation2[:,k]

activation_df2 = pd.DataFrame(activation_data2)
activation_df2.head()
Out[ ]:
actual_class act_val_0 act_val_1
0 5 7.756967 4.144735
1 0 12.252483 10.749725
2 4 0.000000 1.791475
3 1 1.639541 0.000000
4 9 2.452957 6.514253

Visualize with boxplots

In [ ]:
bplot = sns.boxplot(y='act_val_0', x='actual_class', 
                 data=activation_df2[['act_val_0','actual_class']], 
                 width=0.5,
                 palette="colorblind")
In [ ]:
# displaying the range of activation values for each class labels
activation_df2.groupby("actual_class")["act_val_0"].apply(lambda x: [round(min(x.tolist()),2),
 round(max(x.tolist()),2)]).reset_index().rename(columns={"act_val_0": "range_of_act_values"})
Out[ ]:
actual_class range_of_act_values
0 0 [0.0, 29.67]
1 1 [0.0, 18.81]
2 2 [0.0, 38.59]
3 3 [0.0, 32.82]
4 4 [0.0, 19.01]
5 5 [0.0, 21.26]
6 6 [0.0, 17.33]
7 7 [0.0, 20.63]
8 8 [0.0, 19.6]
9 9 [0.0, 21.58]

Create a dataframe with pixel values and class labels

In [ ]:
#Get the dataframe of all the pixel values
pixel_data2 = {'actual_class':y_train}
for k in range(0,128): 
    pixel_data2[f"pix_val_{k}"] = x_train_norm[:,k]
pixel_df2 = pd.DataFrame(pixel_data2)
pixel_df2.head()
Out[ ]:
actual_class pix_val_0 pix_val_1 pix_val_2 pix_val_3 pix_val_4 pix_val_5 pix_val_6 pix_val_7 pix_val_8 pix_val_9 pix_val_10 pix_val_11 pix_val_12 pix_val_13 pix_val_14 pix_val_15 pix_val_16 pix_val_17 pix_val_18 pix_val_19 pix_val_20 pix_val_21 pix_val_22 pix_val_23 pix_val_24 pix_val_25 pix_val_26 pix_val_27 pix_val_28 pix_val_29 pix_val_30 pix_val_31 pix_val_32 pix_val_33 pix_val_34 pix_val_35 pix_val_36 pix_val_37 pix_val_38 ... pix_val_88 pix_val_89 pix_val_90 pix_val_91 pix_val_92 pix_val_93 pix_val_94 pix_val_95 pix_val_96 pix_val_97 pix_val_98 pix_val_99 pix_val_100 pix_val_101 pix_val_102 pix_val_103 pix_val_104 pix_val_105 pix_val_106 pix_val_107 pix_val_108 pix_val_109 pix_val_110 pix_val_111 pix_val_112 pix_val_113 pix_val_114 pix_val_115 pix_val_116 pix_val_117 pix_val_118 pix_val_119 pix_val_120 pix_val_121 pix_val_122 pix_val_123 pix_val_124 pix_val_125 pix_val_126 pix_val_127
0 5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.2
2 4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 9 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

5 rows × 129 columns

In [ ]:
pixel_df2.pix_val_77.value_counts()
Out[ ]:
0.000000    59720
1.000000       25
0.996078       13
0.992157        9
0.050980        6
            ...  
0.392157        1
0.717647        1
0.215686        1
0.925490        1
0.937255        1
Name: pix_val_77, Length: 150, dtype: int64
In [ ]:
pixel_df2.pix_val_78.value_counts()
Out[ ]:
0.000000    59862
1.000000        6
0.141176        4
0.960784        4
0.992157        4
            ...  
0.749020        1
0.717647        1
0.345098        1
0.968627        1
0.654902        1
Name: pix_val_78, Length: 97, dtype: int64

Scatter plot

In [ ]:
plt.figure(figsize=(8, 8))
color = sns.color_palette("hls", 10)
sns.scatterplot(x="pix_val_77", y="pix_val_78", hue="actual_class",  palette=color, data = pixel_df2, legend="full")
plt.legend(loc='upper left')
Out[ ]:
<matplotlib.legend.Legend at 0x7f9fad28fe90>

EXPERIMENT 3: You can explore with more hidden nodes. Then end up with 1 ‘final’ model. Say the ‘best’ model.

In [ ]:
model3 = Sequential([
    Dense(input_shape=[784], units = 200, activation = tf.nn.relu),
    Dense(name = "output_layer", units = 10, activation = tf.nn.softmax)
])
In [ ]:
model3.summary()
Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_8 (Dense)              (None, 200)               157000    
_________________________________________________________________
output_layer (Dense)         (None, 10)                2010      
=================================================================
Total params: 159,010
Trainable params: 159,010
Non-trainable params: 0
_________________________________________________________________
In [ ]:
keras.utils.plot_model(model3, "mnist_model.png", show_shapes=True) 
Out[ ]:

Compile the DNN model

In [ ]:
model3.compile(optimizer='rmsprop',           
               loss = 'categorical_crossentropy',
               metrics=['accuracy'])

Train the DNN model

In [ ]:
history = model3.fit(
    x_train_norm
    ,y_train_encoded
    ,epochs = 200
    ,validation_split=0.20 
    ,callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)] 
    )
Epoch 1/200
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4179 - accuracy: 0.8789 - val_loss: 0.1484 - val_accuracy: 0.9573
Epoch 2/200
1500/1500 [==============================] - 7s 5ms/step - loss: 0.1197 - accuracy: 0.9649 - val_loss: 0.1136 - val_accuracy: 0.9685
Epoch 3/200
1500/1500 [==============================] - 7s 5ms/step - loss: 0.0839 - accuracy: 0.9751 - val_loss: 0.1013 - val_accuracy: 0.9727
Epoch 4/200
1500/1500 [==============================] - 7s 5ms/step - loss: 0.0662 - accuracy: 0.9812 - val_loss: 0.1045 - val_accuracy: 0.9727
Epoch 5/200
1500/1500 [==============================] - 7s 5ms/step - loss: 0.0543 - accuracy: 0.9843 - val_loss: 0.1098 - val_accuracy: 0.9736
Epoch 6/200
1500/1500 [==============================] - 7s 4ms/step - loss: 0.0434 - accuracy: 0.9873 - val_loss: 0.1232 - val_accuracy: 0.9694
Epoch 7/200
1500/1500 [==============================] - 7s 5ms/step - loss: 0.0376 - accuracy: 0.9889 - val_loss: 0.1062 - val_accuracy: 0.9743
Epoch 8/200
1500/1500 [==============================] - 7s 4ms/step - loss: 0.0319 - accuracy: 0.9915 - val_loss: 0.1034 - val_accuracy: 0.9774
Epoch 9/200
1500/1500 [==============================] - 7s 5ms/step - loss: 0.0252 - accuracy: 0.9928 - val_loss: 0.1165 - val_accuracy: 0.9756
Epoch 10/200
1500/1500 [==============================] - 7s 4ms/step - loss: 0.0218 - accuracy: 0.9941 - val_loss: 0.1141 - val_accuracy: 0.9758

Evaluate the DNN model

In [ ]:
loss3, accuracy3 = model3.evaluate(x_test_norm, y_test_encoded)
print('test set accuracy: ', accuracy3 * 100)
313/313 [==============================] - 1s 2ms/step - loss: 0.1043 - accuracy: 0.9773
test set accuracy:  97.72999882698059

6. Making Predictions

In [ ]:
preds3 = model3.predict(x_test_norm)
print('shape of preds: ', preds3.shape)
shape of preds:  (10000, 10)
In [ ]:
plt.figure(figsize = (12, 12))

start_index = 0

for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    pred3 = np.argmax(preds3[start_index + i])
    actual3 = np.argmax(y_test_encoded[start_index + i])
    col = 'g'
    if pred3 != actual3:
        col = 'r'
    plt.xlabel('i={} | pred={} | true={}'.format(start_index + i, pred3, actual3), color = col)
    plt.imshow(x_test[start_index + i], cmap='binary')
plt.show()
In [ ]:
index = 17

plt.plot(preds3[index])
plt.show()

7. Reviewing Performance

In [ ]:
history_dict3 = history.history
history_dict3.keys()
Out[ ]:
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])

Plot performance metrics

In [ ]:
history_dict3 = history.history
history_dict3.keys()
Out[ ]:
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
In [ ]:
losses3 = history.history['loss']
accs3 = history.history['accuracy']
val_losses3 = history.history['val_loss']
val_accs3 = history.history['val_accuracy']
epochs = len(losses3)
In [ ]:
plt.figure(figsize=(16, 4))
for i, metrics in enumerate(zip([losses3, accs3], [val_losses3, val_accs3], ['Loss', 'Accuracy'])):
    plt.subplot(1, 2, i + 1)
    plt.plot(range(epochs), metrics[0], label='Training {}'.format(metrics[2]))
    plt.plot(range(epochs), metrics[1], label='Validation {}'.format(metrics[2]))
    plt.legend()
plt.show()

Create the confusion matrix

In [ ]:
# Get the predicted classes:
pred_classes3 = np.argmax(model3.predict(x_train_norm), axis=-1)
pred_classes3
Out[ ]:
array([5, 0, 4, ..., 5, 6, 8])
In [ ]:
conf_mx3 = tf.math.confusion_matrix(y_train, pred_classes3)
conf_mx3
Out[ ]:
<tf.Tensor: shape=(10, 10), dtype=int32, numpy=
array([[5862,    1,    8,    3,    3,   15,    7,    2,   10,   12],
       [   0, 6711,    4,    4,    5,    0,    2,    8,    8,    0],
       [   2,    6, 5914,   14,    3,    2,    1,    6,    9,    1],
       [   0,    1,   12, 6070,    1,   20,    0,    7,   14,    6],
       [   0,    3,    0,    0, 5830,    0,    4,    1,    0,    4],
       [   0,    0,    5,   15,    1, 5378,   11,    0,    6,    5],
       [   1,    0,    2,    0,    3,    7, 5904,    0,    1,    0],
       [   2,    8,   14,    3,    9,    3,    0, 6214,    2,   10],
       [   1,    5,    5,   10,    3,   16,    5,    2, 5799,    5],
       [   3,    3,    0,    6,   87,   14,    0,   22,    8, 5806]], dtype=int32)>
In [ ]:
print("The first prediction\n {}\n".format(pred_classes3[0]))
The first prediction
 5

In [ ]:
print("First ten entries of the predictions:\n {}\n".format(pred_classes3[0:10]))
First ten entries of the predictions:
 [5 0 4 1 9 2 1 3 1 4]

In [ ]:
cm = sns.light_palette((260, 75, 60), input="husl", as_cmap=True)
In [ ]:
df3 = pd.DataFrame(preds3[0:20], columns = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
df.style.format("{:.2%}").background_gradient(cmap=cm)
Out[ ]:
0 1 2 3 4 5 6 7 8 9
0 34.70% 0.00% 3.73% 0.01% 0.00% 0.77% 1.64% 55.98% 0.81% 2.36%
1 32.44% 0.00% 65.23% 2.10% 0.00% 0.17% 0.00% 0.00% 0.06% 0.00%
2 0.01% 87.32% 0.19% 1.58% 0.49% 3.76% 0.02% 0.12% 5.52% 0.99%
3 82.88% 0.00% 4.22% 0.00% 0.00% 0.14% 0.02% 12.60% 0.12% 0.03%
4 0.00% 0.01% 0.01% 0.00% 86.84% 0.11% 2.79% 0.39% 0.19% 9.67%
5 0.00% 94.65% 0.08% 1.07% 0.06% 1.62% 0.00% 0.03% 2.31% 0.17%
6 0.00% 0.11% 0.02% 0.01% 86.58% 0.31% 1.72% 0.41% 0.55% 10.29%
7 0.06% 17.44% 0.56% 1.29% 28.95% 9.83% 1.30% 1.99% 15.43% 23.13%
8 0.00% 0.00% 0.00% 0.00% 69.80% 0.01% 17.23% 0.64% 0.02% 12.30%
9 0.14% 0.00% 0.01% 0.00% 0.51% 0.01% 70.55% 9.96% 0.02% 18.80%
10 56.20% 0.00% 34.29% 0.55% 0.00% 3.16% 0.00% 3.33% 2.45% 0.01%
11 9.96% 0.00% 8.46% 0.42% 0.01% 10.67% 2.63% 40.51% 12.94% 14.42%
12 0.46% 0.00% 0.19% 0.00% 2.27% 0.56% 32.37% 17.42% 0.84% 45.89%
13 51.27% 0.00% 18.80% 0.19% 0.00% 3.88% 0.07% 21.71% 3.65% 0.43%
14 0.00% 92.25% 0.14% 1.97% 0.01% 2.31% 0.00% 0.03% 3.16% 0.12%
15 0.46% 0.61% 10.12% 45.52% 0.00% 21.76% 0.00% 0.14% 21.36% 0.02%
16 0.16% 0.00% 0.10% 0.00% 11.89% 0.49% 28.44% 9.33% 0.78% 48.79%
17 55.19% 0.00% 6.06% 0.01% 0.00% 0.76% 0.30% 36.36% 0.73% 0.60%
18 9.63% 0.00% 37.46% 11.62% 0.00% 20.75% 0.00% 1.76% 18.72% 0.05%
19 0.00% 0.28% 0.02% 0.02% 86.12% 0.45% 1.46% 0.42% 0.77% 10.46%

Visualize the confusion matrix

In [ ]:
def plot_confusion_matrix(matrix):
    """If you prefer color and a colorbar"""
    fig = plt.figure(figsize=(8,8))
    ax = fig.add_subplot(111)
    cax = ax.matshow(matrix)
    fig.colorbar(cax)
In [ ]:
plt.figure(figsize=(16,8))
plt.matshow(conf_mx3, cmap=plt.cm.Blues,  fignum=1)
plt.xlabel("Predicted Classes")
plt.ylabel("Actual Classes")
plt.show()
In [ ]:
def plot_digits(instances, pos, images_per_row=5, **options):
    size = 28
    images_per_row = min(len(instances), images_per_row)
    images = [instance.reshape(size,size) for instance in instances]
    n_rows = (len(instances) - 1) // images_per_row + 1
    row_images = []
    n_empty = n_rows * images_per_row - len(instances)
    images.append(np.zeros((size, size * n_empty)))
    for row in range(n_rows):
        rimages = images[row * images_per_row : (row + 1) * images_per_row]
        row_images.append(np.concatenate(rimages, axis=1))
    image = np.concatenate(row_images, axis=0)
    pos.imshow(image, cmap = 'binary', **options)
    pos.axis("off")
In [ ]:
cl_a, cl_b = 4, 9
X_aa = x_train_norm[(y_train == cl_a) & (pred_classes3 == cl_a)]
X_ab = x_train_norm[(y_train == cl_a) & (pred_classes3 == cl_b)]
X_ba = x_train_norm[(y_train == cl_b) & (pred_classes3 == cl_a)]
X_bb = x_train_norm[(y_train == cl_b) & (pred_classes3 == cl_b)]

plt.figure(figsize=(6,6))

p1 = plt.subplot(221)
p2 = plt.subplot(222)
p3 = plt.subplot(223)
p4 = plt.subplot(224)

plot_digits(X_aa[:25], p1, images_per_row=5);
plot_digits(X_ab[:25], p2, images_per_row=5);
plot_digits(X_ba[:25], p3, images_per_row=5);  
plot_digits(X_bb[:25], p4, images_per_row=5);


p1.set_title(f"{cl_a}'s classified as {cl_a}'s")
p2.set_title(f"{cl_a}'s classified as {cl_b}'s")
p3.set_title(f"{cl_b}'s classified as {cl_a}'s")
p4.set_title(f"{cl_b}'s classified as {cl_b}'s")

# plt.savefig("error_analysis_digits_plot_EXP1_valid")

plt.show()

First, display 50 of the 4's classified correctly...

In [ ]:
# display 50 of the 4's classified correctly
plt.figure(figsize=(10,10))
plot_digits(X_aa[:50],plt,images_per_row = 10)

Then all the 4's classified as 9's...

In [ ]:
num = X_ab.shape[0]
plt.figure(figsize=(10,10))
plot_digits(X_ab[:num],plt, images_per_row = 10)

Then display 50 of the 9's classified correctly

In [ ]:
plt.figure(figsize=(10,10))
plot_digits(X_bb[:50],plt, images_per_row = 10)

Finally, display al the 9's classified as 4's.

In [ ]:
# display all the 7's classified as 3's
num = X_ba.shape[0]
plt.figure(figsize=(10,10))
plot_digits(X_ba[:num],plt, images_per_row = 10)

8. Analyzing the Activation Values of the Hidden Nodes

Get the activation values of the hidden nodes

In [ ]:
# Extracts the outputs of the 2 layers:
layer_outputs3 = [layer.output for layer in model3.layers]

# Creates a model that will return these outputs, given the model input:
activation_model3 = models.Model(inputs=model3.input, outputs=layer_outputs3)

print(f"There are {len(layer_outputs3)} layers")
layer_outputs3 # description of the layers
There are 2 layers
Out[ ]:
[<KerasTensor: shape=(None, 200) dtype=float32 (created by layer 'dense_8')>,
 <KerasTensor: shape=(None, 10) dtype=float32 (created by layer 'output_layer')>]
In [ ]:
# Get the outputs of all the hidden nodes for each of the 60000 training images
activations3 = activation_model3.predict(x_train_norm)
hidden_layer_activation3 = activations3[0]
output_layer_activations3 = activations3[1]
hidden_layer_activation3.shape   #  each of the 128 hidden nodes has one activation value per training image
Out[ ]:
(60000, 200)
In [ ]:
output_layer_activations3.shape
Out[ ]:
(60000, 10)
In [ ]:
print(f"The maximum activation value of the hidden nodes in the hidden layer is \
{hidden_layer_activation3.max()}")
The maximum activation value of the hidden nodes in the hidden layer is 19.826187133789062
In [ ]:
# Some stats about the output layer as an aside...
np.set_printoptions(suppress = True)  # display probabilities as decimals and NOT in scientific notation
ouput_layer_activation3 = activations3[1]
print(f"The output node has shape {ouput_layer_activation3.shape}")
print(f"The output for the first image are {ouput_layer_activation3[0].round(4)}")
print(f"The sum of the probabilities is (approximately) {ouput_layer_activation3[0].sum()}")
The output node has shape (60000, 10)
The output for the first image are [0.    0.    0.    0.001 0.    0.999 0.    0.    0.    0.   ]
The sum of the probabilities is (approximately) 1.0

Create a dataframe with the activation values and the class labels

In [ ]:
#Get the dataframe of all the node values
activation_data3 = {'actual_class':y_train}
for k in range(0,200): 
    activation_data3[f"act_val_{k}"] = hidden_layer_activation3[:,k]

activation_df3 = pd.DataFrame(activation_data3)
activation_df3.head()
Out[ ]:
actual_class act_val_0 act_val_1 act_val_2 act_val_3 act_val_4 act_val_5 act_val_6 act_val_7 act_val_8 act_val_9 act_val_10 act_val_11 act_val_12 act_val_13 act_val_14 act_val_15 act_val_16 act_val_17 act_val_18 act_val_19 act_val_20 act_val_21 act_val_22 act_val_23 act_val_24 act_val_25 act_val_26 act_val_27 act_val_28 act_val_29 act_val_30 act_val_31 act_val_32 act_val_33 act_val_34 act_val_35 act_val_36 act_val_37 act_val_38 ... act_val_160 act_val_161 act_val_162 act_val_163 act_val_164 act_val_165 act_val_166 act_val_167 act_val_168 act_val_169 act_val_170 act_val_171 act_val_172 act_val_173 act_val_174 act_val_175 act_val_176 act_val_177 act_val_178 act_val_179 act_val_180 act_val_181 act_val_182 act_val_183 act_val_184 act_val_185 act_val_186 act_val_187 act_val_188 act_val_189 act_val_190 act_val_191 act_val_192 act_val_193 act_val_194 act_val_195 act_val_196 act_val_197 act_val_198 act_val_199
0 5 1.204765 0.000000 0.000000 0.000000 0.000000 1.021519 0.000000 0.000000 1.314113 0.000000 0.000000 3.047846 0.000000 0.000000 4.466662 0.115163 0.000000 0.000000 0.000000 0.000000 2.869823 1.123623 0.000000 0.000000 3.896005 1.593759 0.000000 0.000000 0.073945 4.153744 0.658776 2.940801 0.000000 3.618613 0.000000 0.000000 1.202119 0.000000 0.000000 ... 3.220987 0.000000 4.860728 0.000000 0.0 0.0 1.660989 0.000000 0.000000 0.380399 0.000000 1.931330 5.446306 1.400639 1.613296 0.839309 2.498523 0.000000 3.863715 0.000000 2.460718 0.000000 0.000000 0.0 0.000000 3.421399 0.585387 0.000000 0.000000 3.459974 0.000000 1.906776 0.000000 0.106955 0.000000 1.847731 0.000000 1.298136 0.000000 0.000000
1 0 0.000000 0.000000 0.000000 0.000000 0.071368 4.937801 1.023013 3.372933 0.975488 0.387537 0.000000 0.000000 0.741135 0.000000 0.263535 0.000000 0.152046 0.000000 1.801756 0.000000 0.000000 0.000000 1.640307 0.724072 6.084941 4.482925 1.110132 0.022936 0.000000 3.195602 0.000000 0.000000 1.395937 1.138590 2.053701 0.000000 4.047234 0.000000 2.471775 ... 0.000000 0.000000 0.956564 0.000000 0.0 0.0 0.000000 1.136246 0.539402 0.000000 0.000000 0.000000 1.895153 0.084886 0.000000 0.000000 0.000000 5.449880 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.842548 0.000000 0.000000 1.067695 0.000000 0.000000 0.000000 0.000000 0.000000 0.613777 0.000000 0.000000 0.000000 0.000000
2 4 0.000000 1.748989 2.298413 2.102996 0.000000 2.434932 2.219269 3.917169 0.000000 0.000000 1.954222 0.000000 0.560103 0.000000 0.000000 1.737233 3.213876 0.023904 0.000000 2.606196 0.000000 0.000000 0.000000 1.373368 5.173073 0.000000 0.816979 0.871570 0.000000 0.000000 0.509239 0.000000 1.141295 0.000000 2.421555 2.363522 2.769516 2.542663 1.376474 ... 2.645068 0.766219 0.000000 0.002763 0.0 0.0 0.000000 0.000000 2.830308 0.000000 1.985124 0.000000 0.000000 0.000000 2.830784 0.000000 0.000000 0.000000 0.203455 4.378607 1.261986 3.363411 0.000000 0.0 0.000000 0.000000 0.000000 2.253654 1.359293 0.000000 0.262446 0.000000 0.712987 0.000000 0.402733 2.782017 3.592431 0.000000 0.000000 0.000000
3 1 0.000000 0.000000 0.000000 0.000000 0.019741 0.000000 0.000000 0.000000 0.000000 0.143889 0.152653 0.000000 0.903302 0.150160 0.235547 0.415135 0.000000 0.000000 0.684197 0.000000 1.329561 0.072578 1.981808 0.000000 1.588170 0.000000 3.182226 0.000000 0.084296 0.509807 2.303061 0.557964 1.543166 2.455105 0.000000 0.000000 0.000000 3.381430 2.185526 ... 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 3.385043 0.000000 2.186589 1.953897 0.233049 1.595879 0.679481 0.000000 1.061470 1.081446 0.790498 0.331675 0.0 0.000000 2.697443 2.936074 0.424603 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.154509 0.895517 0.000000
4 9 0.000000 0.000000 2.215918 0.000000 0.567469 0.000000 2.711089 0.000000 0.000000 0.928309 0.000000 0.000000 5.477235 0.620808 1.858695 2.149416 1.116122 0.000000 0.000000 0.000000 1.254208 0.000000 0.000000 2.721621 2.742409 0.000000 0.000000 0.437495 0.188736 0.000000 0.000000 0.000000 1.438314 2.413706 2.028067 0.555518 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.811643 2.093363 0.0 0.0 0.000000 0.000000 0.491809 0.000000 1.633112 0.000000 0.253921 0.000000 0.000000 1.203990 0.000000 1.857388 0.000000 0.000000 2.192178 0.086945 0.000000 0.0 1.200776 0.000000 0.437569 2.440629 0.156350 0.000000 0.298602 0.000000 2.859603 0.000000 3.453403 2.948293 1.820704 0.000000 0.000000 1.119482

5 rows × 201 columns

Visualize the activation values with boxplots

In [ ]:
# To see how closely the hidden node activation values correlate with the class labels
# Let us use seaborn for the boxplots this time.
bplot = sns.boxplot(y='act_val_0', x='actual_class', 
                 data=activation_df3[['act_val_0','actual_class']], 
                 width=0.5,
                 palette="colorblind")
In [ ]:
# displaying the range of activation values for each class labels
activation_df3.groupby("actual_class")["act_val_0"].apply(lambda x: [round(min(x.tolist()),2),
 round(max(x.tolist()),2)]).reset_index().rename(columns={"act_val_0": "range_of_act_values"})
Out[ ]:
actual_class range_of_act_values
0 0 [0.0, 5.51]
1 1 [0.0, 4.08]
2 2 [0.0, 5.9]
3 3 [0.0, 7.83]
4 4 [0.0, 3.69]
5 5 [0.0, 6.86]
6 6 [0.0, 2.69]
7 7 [0.0, 1.65]
8 8 [0.0, 2.7]
9 9 [0.0, 3.15]

Create a dataframe with the pixel values and class labels

In [ ]:
#Get the dataframe of all the pixel values
pixel_data3 = {'actual_class':y_train}
for k in range(0,154): 
    pixel_data3[f"pix_val_{k}"] = x_train_norm[:,k]
pixel_df3 = pd.DataFrame(pixel_data3)
pixel_df3.head()
Out[ ]:
actual_class pix_val_0 pix_val_1 pix_val_2 pix_val_3 pix_val_4 pix_val_5 pix_val_6 pix_val_7 pix_val_8 pix_val_9 pix_val_10 pix_val_11 pix_val_12 pix_val_13 pix_val_14 pix_val_15 pix_val_16 pix_val_17 pix_val_18 pix_val_19 pix_val_20 pix_val_21 pix_val_22 pix_val_23 pix_val_24 pix_val_25 pix_val_26 pix_val_27 pix_val_28 pix_val_29 pix_val_30 pix_val_31 pix_val_32 pix_val_33 pix_val_34 pix_val_35 pix_val_36 pix_val_37 pix_val_38 ... pix_val_114 pix_val_115 pix_val_116 pix_val_117 pix_val_118 pix_val_119 pix_val_120 pix_val_121 pix_val_122 pix_val_123 pix_val_124 pix_val_125 pix_val_126 pix_val_127 pix_val_128 pix_val_129 pix_val_130 pix_val_131 pix_val_132 pix_val_133 pix_val_134 pix_val_135 pix_val_136 pix_val_137 pix_val_138 pix_val_139 pix_val_140 pix_val_141 pix_val_142 pix_val_143 pix_val_144 pix_val_145 pix_val_146 pix_val_147 pix_val_148 pix_val_149 pix_val_150 pix_val_151 pix_val_152 pix_val_153
0 5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.011765 0.070588
1 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.2 0.623529 0.992157 0.623529 0.196078 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000
2 4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000
3 1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000
4 9 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000

5 rows × 155 columns

Use a scatter plot to visualize the predicive power of the pixel values at two fixed locations in the image

In [ ]:
plt.figure(figsize=(8, 8))
color = sns.color_palette("hls", 10)
sns.scatterplot(x="pix_val_77", y="pix_val_78", hue="actual_class",  palette=color, data = pixel_df3, legend="full")
plt.legend(loc='upper left')
Out[ ]:
<matplotlib.legend.Legend at 0x7f9fa0c1d690>

Experiment 3 - second model

In [ ]:
model4 = Sequential([
    Dense(input_shape=[784], units = 50, activation = tf.nn.relu),
    Dense(name = "output_layer", units = 10, activation = tf.nn.softmax)
])
In [ ]:
model4.summary()
Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_9 (Dense)              (None, 50)                39250     
_________________________________________________________________
output_layer (Dense)         (None, 10)                510       
=================================================================
Total params: 39,760
Trainable params: 39,760
Non-trainable params: 0
_________________________________________________________________
In [ ]:
keras.utils.plot_model(model4, "mnist_model.png", show_shapes=True) 
Out[ ]:

Compile the DNN model

In [ ]:
model4.compile(optimizer='rmsprop',           
               loss = 'categorical_crossentropy',
               metrics=['accuracy'])

Train the DNN model

In [ ]:
history = model4.fit(
    x_train_norm
    ,y_train_encoded
    ,epochs = 200
    ,validation_split=0.20 
    ,callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)] 
    )
Epoch 1/200
1500/1500 [==============================] - 4s 2ms/step - loss: 0.5616 - accuracy: 0.8479 - val_loss: 0.2155 - val_accuracy: 0.9397
Epoch 2/200
1500/1500 [==============================] - 3s 2ms/step - loss: 0.2153 - accuracy: 0.9372 - val_loss: 0.1743 - val_accuracy: 0.9508
Epoch 3/200
1500/1500 [==============================] - 3s 2ms/step - loss: 0.1526 - accuracy: 0.9555 - val_loss: 0.1464 - val_accuracy: 0.9594
Epoch 4/200
1500/1500 [==============================] - 3s 2ms/step - loss: 0.1248 - accuracy: 0.9625 - val_loss: 0.1411 - val_accuracy: 0.9613
Epoch 5/200
1500/1500 [==============================] - 3s 2ms/step - loss: 0.1089 - accuracy: 0.9689 - val_loss: 0.1377 - val_accuracy: 0.9630
Epoch 6/200
1500/1500 [==============================] - 3s 2ms/step - loss: 0.0986 - accuracy: 0.9717 - val_loss: 0.1338 - val_accuracy: 0.9639
Epoch 7/200
1500/1500 [==============================] - 3s 2ms/step - loss: 0.0837 - accuracy: 0.9759 - val_loss: 0.1244 - val_accuracy: 0.9660
Epoch 8/200
1500/1500 [==============================] - 3s 2ms/step - loss: 0.0796 - accuracy: 0.9771 - val_loss: 0.1246 - val_accuracy: 0.9680
Epoch 9/200
1500/1500 [==============================] - 3s 2ms/step - loss: 0.0731 - accuracy: 0.9793 - val_loss: 0.1346 - val_accuracy: 0.9660
Epoch 10/200
1500/1500 [==============================] - 3s 2ms/step - loss: 0.0643 - accuracy: 0.9814 - val_loss: 0.1290 - val_accuracy: 0.9674

Evaluate the DNN model

In [ ]:
loss4, accuracy4 = model4.evaluate(x_test_norm, y_test_encoded)
print('test set accuracy: ', accuracy4 * 100)
313/313 [==============================] - 1s 3ms/step - loss: 0.1249 - accuracy: 0.9664
test set accuracy:  96.64000272750854

6. Making Predictions

In [ ]:
preds4 = model4.predict(x_test_norm)
print('shape of preds: ', preds4.shape)
shape of preds:  (10000, 10)
In [ ]:
plt.figure(figsize = (12, 12))

start_index = 0

for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    pred4 = np.argmax(preds4[start_index + i])
    actual4 = np.argmax(y_test_encoded[start_index + i])
    col = 'g'
    if pred4 != actual3:
        col = 'r'
    plt.xlabel('i={} | pred={} | true={}'.format(start_index + i, pred4, actual4), color = col)
    plt.imshow(x_test[start_index + i], cmap='binary')
plt.show()
In [ ]:
index = 17

plt.plot(preds4[index])
plt.show()

7. Reviewing Performance

In [ ]:
history_dict4 = history.history
history_dict4.keys()
Out[ ]:
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])

Plot performance metrics

In [ ]:
history_dict4 = history.history
history_dict4.keys()
Out[ ]:
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
In [ ]:
losses4 = history.history['loss']
accs4 = history.history['accuracy']
val_losses4 = history.history['val_loss']
val_accs4 = history.history['val_accuracy']
epochs = len(losses4)
In [ ]:
plt.figure(figsize=(16, 6))
for i, metrics in enumerate(zip([losses4, accs4], [val_losses4, val_accs4], ['Loss', 'Accuracy'])):
    plt.subplot(1, 2, i + 1)
    plt.plot(range(epochs), metrics[0], label='Training {}'.format(metrics[2]))
    plt.plot(range(epochs), metrics[1], label='Validation {}'.format(metrics[2]))
    plt.legend()
plt.show()

Experiment 3 - Third model

In [ ]:
model5 = Sequential([
    Dense(input_shape=[784], units = 150, activation = tf.nn.relu),
    Dense(name = "output_layer", units = 10, activation = tf.nn.softmax)
])
In [ ]:
model5.summary()
Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_10 (Dense)             (None, 150)               117750    
_________________________________________________________________
output_layer (Dense)         (None, 10)                1510      
=================================================================
Total params: 119,260
Trainable params: 119,260
Non-trainable params: 0
_________________________________________________________________
In [ ]:
keras.utils.plot_model(model5, "mnist_model.png", show_shapes=True) 
Out[ ]:

Compile the DNN model

In [ ]:
model5.compile(optimizer='rmsprop',           
               loss = 'categorical_crossentropy',
               metrics=['accuracy'])

Train the DNN model

In [ ]:
history = model5.fit(
    x_train_norm
    ,y_train_encoded
    ,epochs = 200
    ,validation_split=0.20 
    ,callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)] 
    )
Epoch 1/200
1500/1500 [==============================] - 7s 4ms/step - loss: 0.4338 - accuracy: 0.8768 - val_loss: 0.1681 - val_accuracy: 0.9524
Epoch 2/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.1430 - accuracy: 0.9584 - val_loss: 0.1188 - val_accuracy: 0.9668
Epoch 3/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.0919 - accuracy: 0.9733 - val_loss: 0.1121 - val_accuracy: 0.9686
Epoch 4/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.0707 - accuracy: 0.9794 - val_loss: 0.0996 - val_accuracy: 0.9724
Epoch 5/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.0594 - accuracy: 0.9838 - val_loss: 0.1105 - val_accuracy: 0.9723
Epoch 6/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.0472 - accuracy: 0.9863 - val_loss: 0.1019 - val_accuracy: 0.9736
Epoch 7/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.0391 - accuracy: 0.9882 - val_loss: 0.1092 - val_accuracy: 0.9737
Epoch 8/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.0352 - accuracy: 0.9902 - val_loss: 0.1210 - val_accuracy: 0.9715
Epoch 9/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.0290 - accuracy: 0.9926 - val_loss: 0.1201 - val_accuracy: 0.9743
Epoch 10/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.0236 - accuracy: 0.9935 - val_loss: 0.1177 - val_accuracy: 0.9732
Epoch 11/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.0212 - accuracy: 0.9945 - val_loss: 0.1211 - val_accuracy: 0.9748
Epoch 12/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.0183 - accuracy: 0.9952 - val_loss: 0.1237 - val_accuracy: 0.9747
Epoch 13/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.0142 - accuracy: 0.9961 - val_loss: 0.1383 - val_accuracy: 0.9742

Evaluate the DNN model

In [ ]:
loss5, accuracy5 = model5.evaluate(x_test_norm, y_test_encoded)
print('test set accuracy: ', accuracy5 * 100)
313/313 [==============================] - 1s 2ms/step - loss: 0.1109 - accuracy: 0.9755
test set accuracy:  97.54999876022339

6. Making Predictions

In [ ]:
preds5 = model5.predict(x_test_norm)
print('shape of preds: ', preds5.shape)
shape of preds:  (10000, 10)
In [ ]:
plt.figure(figsize = (12, 12))

start_index = 0

for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    pred5 = np.argmax(preds5[start_index + i])
    actual5 = np.argmax(y_test_encoded[start_index + i])
    col = 'g'
    if pred5 != actual5:
        col = 'r'
    plt.xlabel('i={} | pred={} | true={}'.format(start_index + i, pred5, actual5), color = col)
    plt.imshow(x_test[start_index + i], cmap='binary')
plt.show()
In [ ]:
index = 17

plt.plot(preds5[index])
plt.show()

7. Reviewing Performance

In [ ]:
history_dict5 = history.history
history_dict5.keys()
Out[ ]:
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])

Plot performance metrics

In [ ]:
history_dict5 = history.history
history_dict5.keys()
Out[ ]:
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
In [ ]:
losses5 = history.history['loss']
accs5 = history.history['accuracy']
val_losses5 = history.history['val_loss']
val_accs5 = history.history['val_accuracy']
epochs = len(losses5)
In [ ]:
plt.figure(figsize=(16, 4))
for i, metrics in enumerate(zip([losses5, accs5], [val_losses5, val_accs5], ['Loss', 'Accuracy'])):
    plt.subplot(1, 2, i + 1)
    plt.plot(range(epochs), metrics[0], label='Training {}'.format(metrics[2]))
    plt.plot(range(epochs), metrics[1], label='Validation {}'.format(metrics[2]))
    plt.legend()
plt.show()

Create the confusion matrix

In [ ]:
# Get the predicted classes:
pred_classes5 = np.argmax(model5.predict(x_train_norm), axis=-1)
pred_classes5
Out[ ]:
array([5, 0, 4, ..., 5, 6, 8])
In [ ]:
conf_mx5 = tf.math.confusion_matrix(y_train, pred_classes3)
conf_mx5
Out[ ]:
<tf.Tensor: shape=(10, 10), dtype=int32, numpy=
array([[5862,    1,    8,    3,    3,   15,    7,    2,   10,   12],
       [   0, 6711,    4,    4,    5,    0,    2,    8,    8,    0],
       [   2,    6, 5914,   14,    3,    2,    1,    6,    9,    1],
       [   0,    1,   12, 6070,    1,   20,    0,    7,   14,    6],
       [   0,    3,    0,    0, 5830,    0,    4,    1,    0,    4],
       [   0,    0,    5,   15,    1, 5378,   11,    0,    6,    5],
       [   1,    0,    2,    0,    3,    7, 5904,    0,    1,    0],
       [   2,    8,   14,    3,    9,    3,    0, 6214,    2,   10],
       [   1,    5,    5,   10,    3,   16,    5,    2, 5799,    5],
       [   3,    3,    0,    6,   87,   14,    0,   22,    8, 5806]], dtype=int32)>
In [ ]:
print("The first prediction\n {}\n".format(pred_classes5[0]))
The first prediction
 5

In [ ]:
print("First ten entries of the predictions:\n {}\n".format(pred_classes5[0:10]))
First ten entries of the predictions:
 [5 0 4 1 9 2 1 3 1 4]

In [ ]:
cm = sns.light_palette((260, 75, 60), input="husl", as_cmap=True)
In [ ]:
df5 = pd.DataFrame(preds5[0:20], columns = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
df.style.format("{:.2%}").background_gradient(cmap=cm)
Out[ ]:
0 1 2 3 4 5 6 7 8 9
0 34.70% 0.00% 3.73% 0.01% 0.00% 0.77% 1.64% 55.98% 0.81% 2.36%
1 32.44% 0.00% 65.23% 2.10% 0.00% 0.17% 0.00% 0.00% 0.06% 0.00%
2 0.01% 87.32% 0.19% 1.58% 0.49% 3.76% 0.02% 0.12% 5.52% 0.99%
3 82.88% 0.00% 4.22% 0.00% 0.00% 0.14% 0.02% 12.60% 0.12% 0.03%
4 0.00% 0.01% 0.01% 0.00% 86.84% 0.11% 2.79% 0.39% 0.19% 9.67%
5 0.00% 94.65% 0.08% 1.07% 0.06% 1.62% 0.00% 0.03% 2.31% 0.17%
6 0.00% 0.11% 0.02% 0.01% 86.58% 0.31% 1.72% 0.41% 0.55% 10.29%
7 0.06% 17.44% 0.56% 1.29% 28.95% 9.83% 1.30% 1.99% 15.43% 23.13%
8 0.00% 0.00% 0.00% 0.00% 69.80% 0.01% 17.23% 0.64% 0.02% 12.30%
9 0.14% 0.00% 0.01% 0.00% 0.51% 0.01% 70.55% 9.96% 0.02% 18.80%
10 56.20% 0.00% 34.29% 0.55% 0.00% 3.16% 0.00% 3.33% 2.45% 0.01%
11 9.96% 0.00% 8.46% 0.42% 0.01% 10.67% 2.63% 40.51% 12.94% 14.42%
12 0.46% 0.00% 0.19% 0.00% 2.27% 0.56% 32.37% 17.42% 0.84% 45.89%
13 51.27% 0.00% 18.80% 0.19% 0.00% 3.88% 0.07% 21.71% 3.65% 0.43%
14 0.00% 92.25% 0.14% 1.97% 0.01% 2.31% 0.00% 0.03% 3.16% 0.12%
15 0.46% 0.61% 10.12% 45.52% 0.00% 21.76% 0.00% 0.14% 21.36% 0.02%
16 0.16% 0.00% 0.10% 0.00% 11.89% 0.49% 28.44% 9.33% 0.78% 48.79%
17 55.19% 0.00% 6.06% 0.01% 0.00% 0.76% 0.30% 36.36% 0.73% 0.60%
18 9.63% 0.00% 37.46% 11.62% 0.00% 20.75% 0.00% 1.76% 18.72% 0.05%
19 0.00% 0.28% 0.02% 0.02% 86.12% 0.45% 1.46% 0.42% 0.77% 10.46%

Visualize the confusion matrix

In [ ]:
def plot_confusion_matrix(matrix):
    """If you prefer color and a colorbar"""
    fig = plt.figure(figsize=(8,8))
    ax = fig.add_subplot(111)
    cax = ax.matshow(matrix)
    fig.colorbar(cax)
In [ ]:
plt.figure(figsize=(16,8))
plt.matshow(conf_mx3, cmap=plt.cm.Blues,  fignum=1)
plt.xlabel("Predicted Classes")
plt.ylabel("Actual Classes")
plt.show()
In [ ]:
def plot_digits(instances, pos, images_per_row=5, **options):
    size = 28
    images_per_row = min(len(instances), images_per_row)
    images = [instance.reshape(size,size) for instance in instances]
    n_rows = (len(instances) - 1) // images_per_row + 1
    row_images = []
    n_empty = n_rows * images_per_row - len(instances)
    images.append(np.zeros((size, size * n_empty)))
    for row in range(n_rows):
        rimages = images[row * images_per_row : (row + 1) * images_per_row]
        row_images.append(np.concatenate(rimages, axis=1))
    image = np.concatenate(row_images, axis=0)
    pos.imshow(image, cmap = 'binary', **options)
    pos.axis("off")
In [ ]:
cl_a, cl_b = 4, 9
X_aa = x_train_norm[(y_train == cl_a) & (pred_classes5 == cl_a)]
X_ab = x_train_norm[(y_train == cl_a) & (pred_classes5 == cl_b)]
X_ba = x_train_norm[(y_train == cl_b) & (pred_classes5 == cl_a)]
X_bb = x_train_norm[(y_train == cl_b) & (pred_classes5 == cl_b)]

plt.figure(figsize=(6,6))

p1 = plt.subplot(221)
p2 = plt.subplot(222)
p3 = plt.subplot(223)
p4 = plt.subplot(224)

plot_digits(X_aa[:25], p1, images_per_row=5);
plot_digits(X_ab[:25], p2, images_per_row=5);
plot_digits(X_ba[:25], p3, images_per_row=5);  
plot_digits(X_bb[:25], p4, images_per_row=5);


p1.set_title(f"{cl_a}'s classified as {cl_a}'s")
p2.set_title(f"{cl_a}'s classified as {cl_b}'s")
p3.set_title(f"{cl_b}'s classified as {cl_a}'s")
p4.set_title(f"{cl_b}'s classified as {cl_b}'s")

# plt.savefig("error_analysis_digits_plot_EXP1_valid")

plt.show()

8. Analyzing the Activation Values of the Hidden Nodes

Get the activation values of the hidden nodes

In [ ]:
# Extracts the outputs of the 2 layers:
layer_outputs5 = [layer.output for layer in model5.layers]

# Creates a model that will return these outputs, given the model input:
activation_model5 = models.Model(inputs=model5.input, outputs=layer_outputs5)

print(f"There are {len(layer_outputs5)} layers")
layer_outputs5 # description of the layers
There are 2 layers
Out[ ]:
[<KerasTensor: shape=(None, 150) dtype=float32 (created by layer 'dense_10')>,
 <KerasTensor: shape=(None, 10) dtype=float32 (created by layer 'output_layer')>]
In [ ]:
# Get the outputs of all the hidden nodes for each of the 60000 training images
activations5 = activation_model5.predict(x_train_norm)
hidden_layer_activation5 = activations5[0]
output_layer_activations5 = activations5[1]
hidden_layer_activation5.shape   #  each of the 128 hidden nodes has one activation value per training image
Out[ ]:
(60000, 150)
In [ ]:
output_layer_activations5.shape
Out[ ]:
(60000, 10)
In [ ]:
print(f"The maximum activation value of the hidden nodes in the hidden layer is \
{hidden_layer_activation5.max()}")
The maximum activation value of the hidden nodes in the hidden layer is 20.522111892700195
In [ ]:
# Some stats about the output layer as an aside...
np.set_printoptions(suppress = True)  # display probabilities as decimals and NOT in scientific notation
ouput_layer_activation5 = activations5[1]
print(f"The output node has shape {ouput_layer_activation5.shape}")
print(f"The output for the first image are {ouput_layer_activation5[0].round(4)}")
print(f"The sum of the probabilities is (approximately) {ouput_layer_activation5[0].sum()}")
The output node has shape (60000, 10)
The output for the first image are [0.    0.    0.    0.003 0.    0.997 0.    0.    0.    0.   ]
The sum of the probabilities is (approximately) 1.0

Create a dataframe with the activation values and the class labels

In [ ]:
#Get the dataframe of all the node values
activation_data5 = {'actual_class':y_train}
for k in range(0,150): 
    activation_data5[f"act_val_{k}"] = hidden_layer_activation5[:,k]

activation_df5 = pd.DataFrame(activation_data5)
activation_df5.head()
Out[ ]:
actual_class act_val_0 act_val_1 act_val_2 act_val_3 act_val_4 act_val_5 act_val_6 act_val_7 act_val_8 act_val_9 act_val_10 act_val_11 act_val_12 act_val_13 act_val_14 act_val_15 act_val_16 act_val_17 act_val_18 act_val_19 act_val_20 act_val_21 act_val_22 act_val_23 act_val_24 act_val_25 act_val_26 act_val_27 act_val_28 act_val_29 act_val_30 act_val_31 act_val_32 act_val_33 act_val_34 act_val_35 act_val_36 act_val_37 act_val_38 ... act_val_110 act_val_111 act_val_112 act_val_113 act_val_114 act_val_115 act_val_116 act_val_117 act_val_118 act_val_119 act_val_120 act_val_121 act_val_122 act_val_123 act_val_124 act_val_125 act_val_126 act_val_127 act_val_128 act_val_129 act_val_130 act_val_131 act_val_132 act_val_133 act_val_134 act_val_135 act_val_136 act_val_137 act_val_138 act_val_139 act_val_140 act_val_141 act_val_142 act_val_143 act_val_144 act_val_145 act_val_146 act_val_147 act_val_148 act_val_149
0 5 0.021335 0.000000 0.000000 0.000000 6.118069 0.000000 4.176269 2.203383 0.0 3.065037 3.501978 0.000000 3.446827 1.646412 0.000000 0.000000 0.000000 0.00000 1.407939 0.0 6.587011 0.561708 1.197813 5.793682 1.637506 0.676362 0.000000 6.721839 2.588464 3.375461 0.579324 0.382005 4.960715 1.919090 0.0 0.000000 3.097274 0.000000 0.589485 ... 0.000000 3.391589 0.000000 0.000000 2.671148 0.000000 0.000000 0.000000 0.0 1.013868 0.000000 3.002311 0.724365 4.210194 0.000000 0.000000 0.136674 0.393969 0.000000 5.444619 0.0 2.007876 0.000000 2.432763 2.438847 4.901770 0.000000 0.000000 0.000000 1.615759 0.953870 0.035747 2.514633 0.000000 0.026345 0.000000 0.000000 0.000000 2.322777 2.478749
1 0 0.000000 0.000000 0.000000 0.778419 3.448115 0.000000 5.110986 0.000000 0.0 4.358601 0.000000 0.000000 0.761191 0.000000 1.937240 0.000000 2.116217 0.00000 4.607646 0.0 0.000000 10.885712 0.000000 3.674819 0.000000 0.390019 0.000000 0.446810 5.814838 0.000000 0.707490 0.679337 0.808389 0.000000 0.0 0.000000 0.000000 0.000000 0.452799 ... 0.000000 0.000000 1.185149 0.000000 3.036625 0.000000 0.000000 0.201305 0.0 4.791813 0.000000 0.400031 0.000000 0.000000 0.000000 0.464424 0.000000 2.590996 0.000000 0.357858 0.0 0.958678 0.000000 0.011693 1.694877 0.000000 0.000000 1.030553 0.000000 0.000000 5.521540 5.094854 0.000000 1.664341 5.018346 0.000000 7.683796 3.290918 1.602242 0.000000
2 4 0.000000 0.000000 0.873983 4.013408 0.000000 2.526649 1.591807 4.159077 0.0 2.644436 0.000000 0.000000 0.000000 4.730081 0.000000 0.216956 2.345498 0.00000 0.000000 0.0 0.703131 4.886840 3.418443 0.000000 2.340282 0.590354 0.000000 0.000000 0.000000 0.000000 0.934186 0.890197 0.000000 0.803801 0.0 0.000000 0.000000 3.244470 0.000000 ... 0.000000 0.000000 4.060915 7.808596 6.706514 0.000000 0.000000 3.052702 0.0 0.141054 0.000000 0.000000 0.000000 0.000000 1.878484 0.000000 0.000000 1.780808 1.426155 0.000000 0.0 6.026755 3.247062 0.000000 0.000000 0.000000 0.000000 5.033863 1.574336 0.000000 3.894231 2.155331 1.706046 0.000000 5.350781 1.360373 1.714992 0.000000 0.000000 0.000000
3 1 0.000000 0.000000 0.544484 0.000000 0.282133 2.059729 3.472667 1.520575 0.0 1.203204 3.235311 0.000000 0.932999 1.206955 0.348987 0.000000 1.917399 0.00000 1.570269 0.0 0.000000 0.000000 1.896236 0.000000 2.838880 0.000000 0.003654 0.000000 0.000000 0.000000 0.662786 3.553588 0.119184 0.536388 0.0 0.000000 0.000000 4.532386 0.709584 ... 0.000000 2.077226 0.000000 0.000000 0.000000 1.304945 0.000000 0.000000 0.0 0.000000 9.834947 0.000000 4.038259 0.000000 0.000000 0.748696 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.211972 2.241202 1.645772 0.830015 1.205213 0.000000 4.838147 0.000000 2.642892 0.000000 0.000000 0.000000 0.000000 0.496651 0.000000
4 9 1.551035 0.750509 0.099300 2.244193 0.000000 2.276080 2.051856 0.000000 0.0 0.000000 2.529167 2.533322 0.000000 0.000000 0.467361 0.861520 2.279547 2.79709 0.414289 0.0 0.494138 0.000000 0.000000 0.000000 5.305924 0.649650 0.000000 0.000000 1.951440 0.000000 3.324059 0.000000 0.000000 0.000000 0.0 1.474728 0.543621 0.401857 0.896037 ... 1.233881 0.000000 0.000000 3.521267 2.791836 0.000000 0.105017 3.175490 0.0 0.000000 0.000000 0.000000 0.000000 0.335623 1.656263 0.015888 0.000000 5.611236 2.895965 0.545756 0.0 3.654221 0.000000 0.356083 0.279533 2.186874 0.000000 6.312462 0.205551 1.468608 0.000000 0.843630 3.130400 0.000000 0.105767 0.000000 0.000000 0.000000 0.000000 0.000000

5 rows × 151 columns

Visualize the activation values with boxplots

In [ ]:
bplot = sns.boxplot(y='act_val_0', x='actual_class', 
                 data=activation_df5[['act_val_0','actual_class']], 
                 width=0.5,
                 palette="colorblind")
In [ ]:
# displaying the range of activation values for each class labels
activation_df5.groupby("actual_class")["act_val_0"].apply(lambda x: [round(min(x.tolist()),2),
 round(max(x.tolist()),2)]).reset_index().rename(columns={"act_val_0": "range_of_act_values"})
Out[ ]:
actual_class range_of_act_values
0 0 [0.0, 6.08]
1 1 [0.0, 4.79]
2 2 [0.0, 10.01]
3 3 [0.0, 7.27]
4 4 [0.0, 4.4]
5 5 [0.0, 5.06]
6 6 [0.0, 3.63]
7 7 [0.0, 10.22]
8 8 [0.0, 6.09]
9 9 [0.0, 6.89]

Create a dataframe with the pixel values and class labels

In [ ]:
#Get the dataframe of all the pixel values
pixel_data5 = {'actual_class':y_train}
for k in range(0,154): 
    pixel_data5[f"pix_val_{k}"] = x_train_norm[:,k]
pixel_df5 = pd.DataFrame(pixel_data5)
pixel_df5.head()
Out[ ]:
actual_class pix_val_0 pix_val_1 pix_val_2 pix_val_3 pix_val_4 pix_val_5 pix_val_6 pix_val_7 pix_val_8 pix_val_9 pix_val_10 pix_val_11 pix_val_12 pix_val_13 pix_val_14 pix_val_15 pix_val_16 pix_val_17 pix_val_18 pix_val_19 pix_val_20 pix_val_21 pix_val_22 pix_val_23 pix_val_24 pix_val_25 pix_val_26 pix_val_27 pix_val_28 pix_val_29 pix_val_30 pix_val_31 pix_val_32 pix_val_33 pix_val_34 pix_val_35 pix_val_36 pix_val_37 pix_val_38 ... pix_val_114 pix_val_115 pix_val_116 pix_val_117 pix_val_118 pix_val_119 pix_val_120 pix_val_121 pix_val_122 pix_val_123 pix_val_124 pix_val_125 pix_val_126 pix_val_127 pix_val_128 pix_val_129 pix_val_130 pix_val_131 pix_val_132 pix_val_133 pix_val_134 pix_val_135 pix_val_136 pix_val_137 pix_val_138 pix_val_139 pix_val_140 pix_val_141 pix_val_142 pix_val_143 pix_val_144 pix_val_145 pix_val_146 pix_val_147 pix_val_148 pix_val_149 pix_val_150 pix_val_151 pix_val_152 pix_val_153
0 5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.011765 0.070588
1 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.2 0.623529 0.992157 0.623529 0.196078 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000
2 4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000
3 1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000
4 9 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.000000

5 rows × 155 columns

Use a scatter plot to visualize the predicive power of the pixel values at two fixed locations in the image

In [ ]:
plt.figure(figsize=(8, 8))
color = sns.color_palette("hls", 10)
sns.scatterplot(x="pix_val_77", y="pix_val_78", hue="actual_class",  palette=color, data = pixel_df5, legend="full")
plt.legend(loc='upper left')
Out[ ]:
<matplotlib.legend.Legend at 0x7f9fa3afad10>

Experiment 3 - extra testing model

In [ ]:
model6 = Sequential([
    Dense(input_shape=[784], units = 150, activation = tf.nn.sigmoid),
    Dense(name = "output_layer", units = 10, activation = tf.nn.softmax)
])
In [ ]:
model6.summary()
Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_11 (Dense)             (None, 150)               117750    
_________________________________________________________________
output_layer (Dense)         (None, 10)                1510      
=================================================================
Total params: 119,260
Trainable params: 119,260
Non-trainable params: 0
_________________________________________________________________
In [ ]:
keras.utils.plot_model(model6, "mnist_model.png", show_shapes=True) 
Out[ ]:

Compile the DNN model

In [ ]:
model6.compile(optimizer='rmsprop',           
               loss = 'squared_hinge',
               metrics=['accuracy'])

Train the DNN model

In [ ]:
history = model6.fit(
    x_train_norm
    ,y_train_encoded
    ,epochs = 200
    ,validation_split=0.20 
    ,callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)] 
    )
Epoch 1/200
1500/1500 [==============================] - 7s 4ms/step - loss: 0.9927 - accuracy: 0.7991 - val_loss: 0.9307 - val_accuracy: 0.9267
Epoch 2/200
1500/1500 [==============================] - 7s 4ms/step - loss: 0.9294 - accuracy: 0.9301 - val_loss: 0.9246 - val_accuracy: 0.9407
Epoch 3/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.9231 - accuracy: 0.9446 - val_loss: 0.9205 - val_accuracy: 0.9510
Epoch 4/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.9201 - accuracy: 0.9517 - val_loss: 0.9179 - val_accuracy: 0.9566
Epoch 5/200
1500/1500 [==============================] - 7s 4ms/step - loss: 0.9168 - accuracy: 0.9601 - val_loss: 0.9163 - val_accuracy: 0.9601
Epoch 6/200
1500/1500 [==============================] - 7s 4ms/step - loss: 0.9149 - accuracy: 0.9644 - val_loss: 0.9152 - val_accuracy: 0.9633
Epoch 7/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.9132 - accuracy: 0.9681 - val_loss: 0.9144 - val_accuracy: 0.9645
Epoch 8/200
1500/1500 [==============================] - 7s 4ms/step - loss: 0.9119 - accuracy: 0.9716 - val_loss: 0.9137 - val_accuracy: 0.9653
Epoch 9/200
1500/1500 [==============================] - 7s 4ms/step - loss: 0.9107 - accuracy: 0.9749 - val_loss: 0.9129 - val_accuracy: 0.9680
Epoch 10/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.9095 - accuracy: 0.9780 - val_loss: 0.9125 - val_accuracy: 0.9681
Epoch 11/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.9088 - accuracy: 0.9796 - val_loss: 0.9124 - val_accuracy: 0.9688
Epoch 12/200
1500/1500 [==============================] - 7s 4ms/step - loss: 0.9083 - accuracy: 0.9806 - val_loss: 0.9118 - val_accuracy: 0.9700
Epoch 13/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.9074 - accuracy: 0.9830 - val_loss: 0.9117 - val_accuracy: 0.9704
Epoch 14/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.9068 - accuracy: 0.9846 - val_loss: 0.9112 - val_accuracy: 0.9726
Epoch 15/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.9067 - accuracy: 0.9848 - val_loss: 0.9112 - val_accuracy: 0.9717
Epoch 16/200
1500/1500 [==============================] - 6s 4ms/step - loss: 0.9062 - accuracy: 0.9858 - val_loss: 0.9108 - val_accuracy: 0.9733
Epoch 17/200
1500/1500 [==============================] - 7s 4ms/step - loss: 0.9059 - accuracy: 0.9862 - val_loss: 0.9109 - val_accuracy: 0.9726
Epoch 18/200
1500/1500 [==============================] - 7s 4ms/step - loss: 0.9054 - accuracy: 0.9876 - val_loss: 0.9109 - val_accuracy: 0.9718

Evaluate the DNN model

In [ ]:
loss6, accuracy6 = model6.evaluate(x_test_norm, y_test_encoded)
print('test set accuracy: ', accuracy6 * 100)
313/313 [==============================] - 1s 2ms/step - loss: 0.9104 - accuracy: 0.9735
test set accuracy:  97.35000133514404

6. Making Predictions

In [ ]:
preds6 = model6.predict(x_test_norm)
print('shape of preds: ', preds6.shape)
shape of preds:  (10000, 10)
In [ ]:
plt.figure(figsize = (12, 12))

start_index = 0

for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    pred6 = np.argmax(preds6[start_index + i])
    actual6 = np.argmax(y_test_encoded[start_index + i])
    col = 'g'
    if pred6 != actual6:
        col = 'r'
    plt.xlabel('i={} | pred={} | true={}'.format(start_index + i, pred6, actual6), color = col)
    plt.imshow(x_test[start_index + i], cmap='binary')
plt.show()
In [ ]:
index = 17

plt.plot(preds6[index])
plt.show()

7. Reviewing Performance

In [ ]:
history_dict6 = history.history
history_dict6.keys()
Out[ ]:
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])

Plot performance metrics

In [ ]:
history_dict6 = history.history
history_dict6.keys()
Out[ ]:
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
In [ ]:
losses6 = history.history['loss']
accs6 = history.history['accuracy']
val_losses6 = history.history['val_loss']
val_accs6 = history.history['val_accuracy']
epochs = len(losses6)
In [ ]:
plt.figure(figsize=(16, 4))
for i, metrics in enumerate(zip([losses6, accs6], [val_losses6, val_accs6], ['Loss', 'Accuracy'])):
    plt.subplot(1, 2, i + 1)
    plt.plot(range(epochs), metrics[0], label='Training {}'.format(metrics[2]))
    plt.plot(range(epochs), metrics[1], label='Validation {}'.format(metrics[2]))
    plt.legend()
plt.show()

EXPERIMENT 4: Use PCA decomposition to reduce the number of dimensions of our training set of 28x28 dimensional MNIST images from 784 to 154 (with 95% of training images variance lying along these components). We also reduce the number of dimensions of 'best' model from Experiment 3 to 154 inputs nodes and train it on the new lower dimensional data. We then compare the performance of Experiments 3 and 4.

In [ ]:
# from sklearn.decomposition import PCA

# Separating out the features
features = [*pixel_data5][1:] 
x = pixel_df5.loc[:, features].values 

pca = PCA(n_components=154)
principalComponents = pca.fit_transform(x)
principalDf = pd.DataFrame(data = principalComponents)
In [ ]:
pixel_pca_df = pd.concat([principalDf, pixel_df5[['actual_class']]], axis = 1)
In [ ]:
pixel_pca_df.head()
Out[ ]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 ... 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 actual_class
0 -0.548449 -0.170992 -0.114372 0.056319 -0.004066 0.008820 -0.024104 0.020043 -0.024466 -0.013977 -0.006171 -0.012215 -0.000304 0.012110 -0.009975 0.001588 -0.015117 0.002692 -0.004584 0.001314 -0.000623 0.005842 0.004617 0.000630 -0.002095 0.002103 -0.002527 -0.000334 0.001541 0.000908 0.001402 -0.001581 -0.000559 -0.000307 -0.000901 -0.000103 0.000268 0.000397 -0.000545 -0.000930 ... 0.000000e+00 0.000000e+00 0.000000e+00 -0.000000e+00 -0.000000e+00 0.000000e+00 -0.000000e+00 0.000000e+00 0.000000e+00 -0.000000e+00 0.000000e+00 0.000000e+00 -0.000000e+00 -0.000000e+00 0.000000e+00 -0.000000e+00 0.000000e+00 0.000000e+00 -0.000000e+00 -0.000000e+00 -0.000000e+00 0.000000e+00 0.000000e+00 -0.000000e+00 0.000000e+00 -0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 -0.000000e+00 0.000000e+00 -0.000000e+00 0.000000e+00 -0.000000e+00 -0.000000e+00 -0.000000e+00 -0.000000e+00 0.000000e+00 0.000000e+00 5
1 -0.277966 0.830673 0.402149 0.061926 -0.249843 -0.017566 -0.489576 0.162622 0.327768 -0.067524 0.114482 -0.089158 -0.134790 0.096258 0.085767 -0.031942 0.058948 0.089074 0.034016 -0.009829 -0.043370 -0.015909 0.003659 -0.001881 -0.002657 -0.036122 0.014164 -0.017838 0.024481 -0.016287 0.003374 -0.023407 -0.020374 0.019569 0.004441 0.001188 -0.002869 -0.000368 -0.003961 0.009858 ... -1.530323e-07 1.376956e-06 -9.374788e-07 -1.220277e-07 2.114589e-07 -5.464304e-07 -3.113048e-07 6.879581e-07 1.645207e-07 -4.872568e-07 1.121390e-07 2.513811e-08 -9.589588e-07 -7.433160e-07 1.664150e-06 -3.167405e-07 -7.920415e-07 -4.952032e-07 -9.257695e-07 2.596547e-07 7.156336e-08 4.572174e-07 -3.270148e-07 4.663146e-07 -4.464887e-08 4.680102e-07 1.449335e-07 -1.274425e-07 -2.556986e-07 4.990447e-07 2.027448e-07 -5.238329e-08 -1.581882e-07 1.859987e-08 6.349991e-08 -5.401296e-08 -3.493258e-09 7.321101e-09 -1.233180e-06 0
2 -0.581155 -0.167017 -0.103823 0.095842 0.000643 0.008961 -0.038750 -0.003645 -0.027280 -0.037414 0.008504 -0.010832 0.011745 0.009601 0.004184 0.013923 -0.006159 0.000931 0.000160 0.004694 -0.004483 0.000896 -0.004891 -0.000328 -0.002758 -0.001138 -0.002710 0.002872 -0.000884 -0.001485 0.000236 -0.001112 -0.001632 -0.000478 -0.000189 0.000944 0.000168 -0.000175 -0.000817 -0.000817 ... 5.483406e-06 1.667876e-06 -9.184674e-07 2.241582e-06 -6.816771e-07 3.731073e-06 4.073664e-06 1.559883e-06 -2.278049e-06 1.118597e-07 1.185997e-06 8.527849e-08 -1.265269e-06 -5.236100e-07 -2.312025e-06 -5.902628e-07 1.233449e-06 7.994332e-07 -8.601812e-07 -3.204239e-06 -4.239228e-07 8.230071e-07 -1.183844e-06 -5.378367e-07 8.032429e-07 -6.574517e-07 4.879519e-09 1.265406e-07 4.564076e-07 -2.490823e-07 -2.605170e-07 -1.008450e-08 3.902048e-08 -1.905474e-07 -9.465871e-08 1.518079e-07 8.084286e-09 1.215624e-08 -2.801452e-06 4
3 -0.581151 -0.167017 -0.103855 0.095850 0.000647 0.008963 -0.038744 -0.003649 -0.027274 -0.037408 0.008508 -0.010832 0.011748 0.009598 0.004186 0.013922 -0.006148 0.000928 0.000163 0.004693 -0.004485 0.000892 -0.004895 -0.000324 -0.002758 -0.001138 -0.002709 0.002876 -0.000885 -0.001486 0.000236 -0.001112 -0.001630 -0.000479 -0.000189 0.000943 0.000167 -0.000177 -0.000815 -0.000814 ... 1.885615e-07 2.321109e-07 -3.837167e-07 -5.097778e-07 4.836980e-07 3.548548e-06 3.464673e-06 -4.137087e-06 1.721262e-07 -2.389334e-06 -3.487919e-06 -1.047303e-06 4.218867e-06 4.425461e-06 -3.185742e-07 -2.535074e-07 1.558675e-07 -7.736510e-07 1.941612e-08 -7.412871e-07 -1.435007e-06 6.732448e-07 9.043204e-07 -4.699142e-07 -7.244745e-07 -3.468845e-07 -2.235164e-07 4.217683e-07 -7.092285e-07 -3.660893e-07 -5.087259e-07 -6.536339e-07 3.478068e-07 9.409570e-08 -1.420658e-07 1.215145e-07 -4.207985e-08 -4.389936e-08 9.633114e-07 1
4 -0.581151 -0.167021 -0.103811 0.095840 0.000647 0.008961 -0.038755 -0.003644 -0.027281 -0.037411 0.008509 -0.010830 0.011749 0.009601 0.004183 0.013923 -0.006156 0.000930 0.000163 0.004695 -0.004481 0.000895 -0.004891 -0.000324 -0.002756 -0.001138 -0.002709 0.002873 -0.000886 -0.001485 0.000236 -0.001114 -0.001631 -0.000480 -0.000190 0.000944 0.000167 -0.000174 -0.000816 -0.000815 ... -1.371098e-06 2.416001e-06 -2.019039e-06 4.227900e-06 -2.801625e-06 -1.353290e-06 4.547141e-07 -2.204514e-07 -1.572659e-06 1.444232e-06 -3.882109e-06 5.282678e-07 -7.252142e-07 -9.157538e-07 2.945534e-06 2.857206e-06 -4.754060e-07 -1.280402e-06 1.631552e-06 3.584470e-07 1.101382e-09 2.409541e-07 -3.373225e-08 -2.036683e-08 -7.079782e-07 1.100551e-07 -1.315006e-07 -1.274936e-07 -3.058613e-07 2.086229e-07 6.760575e-08 -1.223789e-07 9.081904e-08 -9.704352e-08 -1.303001e-07 1.198505e-07 -5.557926e-09 -1.517610e-08 2.594403e-06 9

5 rows × 155 columns

In [ ]:
pca.explained_variance_ratio_
Out[ ]:
array([0.34 , 0.181, 0.091, 0.065, 0.052, 0.034, 0.033, 0.024, 0.02 , 0.018, 0.015, 0.013, 0.011, 0.009, 0.008, 0.007, 0.007, 0.006, 0.006, 0.005, 0.005, 0.004, 0.004, 0.003, 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ], dtype=float32)

Use PCA decomposition to reduce activation features from 128 to 2

In [ ]:
# Separating out the features
features = [*activation_data5][1:] 
x = activation_df5.loc[:, features].values 

pca = PCA(n_components=2)
principalComponents = pca.fit_transform(x)
principalDf = pd.DataFrame(data = principalComponents
             , columns = ['principal component 1', 'principal component 2'])
principalDf.head()
Out[ ]:
principal component 1 principal component 2
0 -0.250560 -5.059008
1 15.655589 2.693092
2 3.155660 7.599410
3 -5.572780 -3.533298
4 -4.977498 8.978456
In [ ]:
activation_pca_df = pd.concat([principalDf, activation_df5[['actual_class']]], axis = 1)
activation_pca_df.head()
Out[ ]:
principal component 1 principal component 2 actual_class
0 -0.250560 -5.059008 5
1 15.655589 2.693092 0
2 3.155660 7.599410 4
3 -5.572780 -3.533298 1
4 -4.977498 8.978456 9
In [ ]:
pca.explained_variance_ratio_
Out[ ]:
array([0.139, 0.109], dtype=float32)

Use scatterplot to visualize predictive power of two principal component values

In [ ]:
plt.figure(figsize=(16,10))
sns.scatterplot(
    x="principal component 1", y="principal component 2",
    hue="actual_class",
    palette=sns.color_palette("hls", 10),
    data=activation_pca_df,
    legend="full",
    alpha=0.3
)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f9fad757b10>

Use PCA decomposition to reduce features from 128 to 3

In [ ]:
# Separating out the features
features = [*activation_data5][1:] # ['act_val_0', 'act_val_1',...]
x = activation_df5.loc[:, features].values 

pca = PCA(n_components=3)
principalComponents = pca.fit_transform(x)
principalDf = pd.DataFrame(data = principalComponents
             , columns = ['pca-one', 'pca-two', 'pca-three'])
principalDf.head()
Out[ ]:
pca-one pca-two pca-three
0 -0.250581 -5.058966 11.525914
1 15.655576 2.693126 -0.207022
2 3.155647 7.599390 -2.800363
3 -5.572784 -3.533296 -4.556149
4 -4.977498 8.978468 0.221203
In [ ]:
pca.explained_variance_ratio_
Out[ ]:
array([0.139, 0.109, 0.093], dtype=float32)
In [ ]:
activation_pca_df = pd.concat([principalDf, activation_df5[['actual_class']]], axis = 1)
activation_pca_df.head()
Out[ ]:
pca-one pca-two pca-three actual_class
0 -0.250581 -5.058966 11.525914 5
1 15.655576 2.693126 -0.207022 0
2 3.155647 7.599390 -2.800363 4
3 -5.572784 -3.533296 -4.556149 1
4 -4.977498 8.978468 0.221203 9

Use scatter plot to visualize predictive power of 3 principal componenet values

In [ ]:
# uncomment to to able to rotate the graph...
# %matplotlib notebook   
ax = plt.figure(figsize=(16,10)).gca(projection='3d')
ax.scatter(
    xs=activation_pca_df.loc[:,"pca-one"], 
    ys=activation_pca_df.loc[:,"pca-two"], 
    zs=activation_pca_df.loc[:,"pca-three"], 
    c=activation_pca_df.loc[:,"actual_class"], 
    cmap='tab10'
)
ax.set_xlabel('pca-one')
ax.set_ylabel('pca-two')
ax.set_zlabel('pca-three')
plt.show()

Use t-Distributed Stochastic Neighbor Embedding (t-SNE) to reduce the (activation) features from 128 (= num of hidden nodes) to 2

In [ ]:
N=10000
activation_df_subset = activation_df5.iloc[:N].copy()
activation_df_subset.shape
Out[ ]:
(10000, 151)
In [ ]:
data_subset = activation_df_subset[features].values
data_subset.shape
Out[ ]:
(10000, 150)
In [ ]:
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
tsne_results = tsne.fit_transform(data_subset)
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 10000 samples in 0.326s...
[t-SNE] Computed neighbors for 10000 samples in 37.212s...
[t-SNE] Computed conditional probabilities for sample 1000 / 10000
[t-SNE] Computed conditional probabilities for sample 2000 / 10000
[t-SNE] Computed conditional probabilities for sample 3000 / 10000
[t-SNE] Computed conditional probabilities for sample 4000 / 10000
[t-SNE] Computed conditional probabilities for sample 5000 / 10000
[t-SNE] Computed conditional probabilities for sample 6000 / 10000
[t-SNE] Computed conditional probabilities for sample 7000 / 10000
[t-SNE] Computed conditional probabilities for sample 8000 / 10000
[t-SNE] Computed conditional probabilities for sample 9000 / 10000
[t-SNE] Computed conditional probabilities for sample 10000 / 10000
[t-SNE] Mean sigma: 4.284878
[t-SNE] KL divergence after 250 iterations with early exaggeration: 80.746574
[t-SNE] KL divergence after 300 iterations: 2.624784
In [ ]:
activation_df_subset['tsne-2d-one'] = tsne_results[:,0]
activation_df_subset['tsne-2d-two'] = tsne_results[:,1]

plt.figure(figsize=(16,10))
sns.scatterplot(
    x="tsne-2d-one", y="tsne-2d-two",
    hue="actual_class",
    palette=sns.color_palette("hls", 10),
    data=activation_df_subset,
    legend="full",
    alpha=0.3
)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f9fa3e222d0>

EXPERIMENT 5: We use a Random Forest classifier to get the relative importance of the 784 features (pixels) of the 28x28 dimensional images in training set of MNIST images and select the top 70 features (pixels). We train our 'best' dense neural network using these 70 features and compare its performance to the the dense neural network models from EXPERIMENTS 3 and 4.

Reducing dimensionality with random forest

In [ ]:
rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rnd_clf.fit(x_train_norm,y_train_encoded)
Out[ ]:
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=42, verbose=0,
                       warm_start=False)
In [ ]:
def plot_digit(data):
    image = data.reshape(28, 28)
    plt.imshow(image, cmap = 'hot',
               interpolation="nearest")
    plt.axis("off")

plot_digit(rnd_clf.feature_importances_)
cbar = plt.colorbar(ticks=[rnd_clf.feature_importances_.min(), rnd_clf.feature_importances_.max()])
cbar.ax.set_yticklabels(['Not important', 'Very important'])
plt.show()
In [ ]:
n = 70
imp_arr = rnd_clf.feature_importances_
idx = (-imp_arr).argsort()[:n]          # get the indices of the 70 "most important" features/pixels
len(idx)
Out[ ]:
70
In [ ]:
# Create training and test images using just the 70 pixel locations obtained above
train_images_sm = x_train_norm[:,idx]
test_images_sm = x_test_norm[:,idx]
train_images_sm.shape, test_images_sm.shape 
Out[ ]:
((60000, 70), (10000, 70))

Visualize 70 pixels

In [ ]:
# to convert an index n, 0<= n < 784
def pair(n,size):
    x = n//size 
    y = n%size
    return x,y
In [ ]:
plt.imshow(x_train_norm[1].reshape(28,28),cmap='binary')
x, y = np.array([pair(k,28) for k in idx]).T
plt.scatter(x,y,color='red',s=20)
Out[ ]:
<matplotlib.collections.PathCollection at 0x7f9fa3732d50>